# Goal
The goal of this program is to obtain a "Regression-Based Law of Energy Efficiency in Wireless Sensor Networks",
whose results were used to write a paper with the same name.
We compare two clustering methods, k-means and grid-based clustering, in terms of energy efficiency.
Nodes are uniformly distributed in a 2D area (L x L), with clusters formed using k-means.
A baseline model uses grid clustering for comparison.
Energy consumption analysis is based on Friis' law, and regression algorithms analyze energy patterns in the network.
An energy efficiency law is determined through regression analysis.
We have a set of 128 nodes arranged in a region  $400m \times 400m$,
via uniform probability distribution. Initially, we fixed a set of nodes and compared clustering performance results, via K-means, with N = 3, 4. 

In [1]:
# RegressionLawEnergeticEfficiencyWSN_V1.2
# updated: 05/27/2024 - mb
#from numpy.random import uniform
#from numpy.random import choice

#importando os pacotes básicos
import numpy as np   # um pacote para vetorização dos dados
import matplotlib.pyplot as plt # um pacote para realizar graficos/plotagem
import pandas as pd #um pacote para tornar os dados tabulares
import seaborn as sns #um pacote para realizar graficos mais sofisticados

In [2]:
#importing the resource to control the generation of pseudo-random numbers
#a resource to create seeds and make data drawing reproducible
from numpy.random import default_rng

In [3]:
# defining the seed that will control the generation of pseudo-randoms, to guarantee the reproducibility of results and comparisons
gera = default_rng(seed = 24)

## Setting the parameters of grid partitions and nodes

In [4]:
# side of the experiment area
L = 200

# number of nodes
N_node = 140

# number of grid partitions
N_grid = np.array([3,2])

# number of clusters
N_cluster = N_grid.prod()

#number of achievements
N_realiz = 100

# generating the coordinates of the 128 nodes
# we use the uniform function from the numpy package to generate random data with uniform distribution
coord_nos = gera.uniform(0,L, size=(N_node,2,N_realiz))


In [5]:
# creating the table with the coordinates of the 128 nodes
# a resource from the pandas package is used

df_kmeans = [pd.DataFrame({'x':coord_nos[:,0,i],'y':coord_nos[:,1,i]}) for i in range(N_realiz)]
df_grid = [pd.DataFrame({'x':coord_nos[:,0,i],'y':coord_nos[:,1,i]}) for i in range(N_realiz)]

In [None]:
grid_index

In [None]:
centroid_grid = [(grid_index[i] - 0.5) * (L/N_grid) for i in range(N_realiz)]

In [None]:
# calculating the distance from the centroid to the node for each realization (K-Means)
for i in range(N_realiz):
    dif = np.linalg.norm(centroid_kmeans[i][df_kmeans[i]['label']]- np.array(df_kmeans[i][['x','y']]), axis = 1)
    df_kmeans[i]['c_to_node'] = dif

In [16]:
## Defining DataFrame for K-Means Method

In [None]:
df_kmeans[4]

In [None]:
# calculating the distance from the "centroid" of the grid to the node for each realization
for i in range(N_realiz):
    dif = np.linalg.norm(centroid_grid[i][df_grid[i]['label']]- np.array(df_grid[i][['x','y']]), axis = 1)
    df_grid[i]['c_to_node'] = dif

In [None]:
df_grid[4]

In [None]:
for i in range(N_realiz):
    df_kmeans[i]['x_centroid']=centroid_kmeans[i][df_kmeans[i]['label']][:,0]
    df_kmeans[i]['y_centroid']=centroid_kmeans[i][df_kmeans[i]['label']][:,1]

In [None]:
for i in range(N_realiz):
    df_grid[i]['x_centroid']=centroid_grid[i][df_grid[i]['label']][:,0]
    df_grid[i]['y_centroid']=centroid_grid[i][df_grid[i]['label']][:,1]

In [15]:
### defining data frame for method K-Means

In [None]:
df_kmeans[3]

In [None]:
df_grid[3]

In [None]:
### finding the indices of the closest nodes

In [None]:
indice_kmeans = [[df_kmeans[j][df_kmeans[j]['label'] == i]['c_to_node'].idxmin() for i in range(N_cluster)] for j in range(N_realiz)]

In [None]:
indice_grid = [[df_grid[j][df_grid[j]['label'] == i]['c_to_node'].idxmin() for i in range(N_cluster)] for j in range(N_realiz)]

In [None]:
indice_kmeans

### creating data frames for each realization and cluster

In [None]:
df_rc_kmeans = [[df_kmeans[j][df_kmeans[j]['label']==i] for i in range(N_cluster)] for j in range(N_realiz)]
df_rc_grid = [[df_grid[j][df_grid[j]['label']==i] for i in range(N_cluster)] for j in range(N_realiz)]

In [None]:
# achievement = 33 (thirty-fourth achievement)
# cluster = 3 (quarto cluster)
df_rc_grid[33][3]

In [None]:
# inserting head coordinate columns into DataFrames for each realization and each clusterfor j in range(N_realiz):
    for i in range(N_cluster):
            df_rc_kmeans[j][i]['x_head'] = df_rc_kmeans[j][i].loc[indice_kmeans[j][i]][0]
            df_rc_kmeans[j][i]['y_head'] = df_rc_kmeans[j][i].loc[indice_kmeans[j][i]][1]

In [None]:
# inserting head coordinate columns into DataFrames for each realization and each clusterfor j in range(N_realiz):
    for i in range(N_cluster):
            df_rc_grid[j][i]['x_head'] = df_rc_grid[j][i].loc[indice_grid[j][i]][0]
            df_rc_grid[j][i]['y_head'] = df_rc_grid[j][i].loc[indice_grid[j][i]][1]

In [None]:
df_rc_kmeans[0][1]

In [None]:
df_rc_grid[0][1]

In [None]:
for j in range(N_realiz):
    for i in range(N_cluster):
        dif_k = np.array(df_rc_kmeans[j][i][['x_head','y_head']])- np.array(df_rc_kmeans[j][i][['x','y']])
        df_rc_kmeans[j][i]['h_to_node'] = np.linalg.norm(dif_k, axis = 1)
        dif_g = np.array(df_rc_grid[j][i][['x_head','y_head']])- np.array(df_rc_grid[j][i][['x','y']])
        df_rc_grid[j][i]['h_to_node'] = np.linalg.norm(dif_g, axis = 1)

In [None]:
df_rc_grid[0][1]

Let's do an average value analysis of the "c_to_node" and "h_to_node" columns. However, let's ignore the line of the head node itself.

In [None]:
df_rc_kmeans_no_head = [[df_rc_kmeans[j][i][df_rc_kmeans[j][i]['x'] != df_rc_kmeans[j][i].loc[indice_kmeans[j][i]][0]] for i in range(N_cluster)] for j in range(N_realiz)]

In [None]:
df_rc_kmeans_heads = [[df_rc_kmeans[j][i][df_rc_kmeans[j][i]['x'] == df_rc_kmeans[j][i].loc[indice_kmeans[j][i]][0]] for i in range(N_cluster)] for j in range(N_realiz)]

In [None]:
df_rc_kmeans_heads[66][1]

In [None]:
#adding a column for head to sink distance

obj_kmeans = [KMeans(n_clusters=1, n_init=20) for i in range(N_realiz)]


In [None]:
len(df_rc_kmeans_heads)

In [None]:
[df_rc_kmeans_heads[0][j][['x','y']] for j in range(N_cluster)]

In [None]:
ateste = [np.array([df_rc_kmeans_heads[i][j][['x','y']] for j in range(N_cluster)]) for i in range(N_realiz)]
for i in range(N_realiz):
    obj_kmeans[i].fit(ateste[i].reshape(N_cluster,-1))

In [None]:
sinks = [obj_kmeans[i].cluster_centers_ for i in range(N_realiz)]

In [None]:
len(sinks)

In [None]:
sinks_np = np.array(sinks)
sinks_np.reshape(100,-1)

In [None]:
sinks[0].reshape(-1).shape, np.array([1.,1.]).shape

In [None]:
data_df = pd.DataFrame([3, 1, 2, 4], index=['a', 'b', 'c', 'd']).transpose()
data_df

In [None]:
points_df = pd.DataFrame([3.5, 0.5, 1.75, 4.25], index=['a', 'b', 'c', 'd']).transpose()

plt.figure()
sns.barplot(data=data_df)
sns.scatterplot(data=points_df.T, legend=False, zorder=10)

In [None]:
sink_corner = False

if sink_corner:
    for j in range(N_realiz):
        for i in range(N_cluster):
            dife_k = np.array(df_rc_kmeans_heads[j][i][['x_head','y_head']])- np.array([L,L])
            df_rc_kmeans_heads[j][i]['h_to_sink'] = np.linalg.norm(dife_k, axis = 1)
else:
    
    for j in range(N_realiz):
        for i in range(N_cluster):
            dife_k = np.array(df_rc_kmeans_heads[j][i][['x_head','y_head']])- sinks[j]
            df_rc_kmeans_heads[j][i]['h_to_sink'] = np.linalg.norm(dife_k, axis = 1)

In [None]:
df_rc_grid_no_head = [[df_rc_grid[j][i][df_rc_grid[j][i]['x'] != df_rc_grid[j][i].loc[indice_grid[j][i]][0]] for i in range(N_cluster)] for j in range(N_realiz)]

In [None]:
df_rc_grid_heads = [[df_rc_grid[j][i][df_rc_grid[j][i]['x'] == df_rc_grid[j][i].loc[indice_grid[j][i]][0]] for i in range(N_cluster)] for j in range(N_realiz)]

In [None]:
#adding a column for head to sink distance

if sink_corner:
    for j in range(N_realiz):
        for i in range(N_cluster):
            dife_g = np.array(df_rc_grid_heads[j][i][['x_head','y_head']])- np.array([L,L])
            df_rc_grid_heads[j][i]['h_to_sink'] = np.linalg.norm(dife_g, axis = 1)
else:
    for j in range(N_realiz):
        for i in range(N_cluster):
            dife_g = np.array(df_rc_grid_heads[j][i][['x_head','y_head']])- np.array([L/2.,L/2.])
            df_rc_grid_heads[j][i]['h_to_sink'] = np.linalg.norm(dife_g, axis = 1)
        

In [None]:
df_rc_grid_heads[66][3]

In [None]:
df_rc_kmeans_heads[66][3]

## let's create the transmitted power column

To do so, we must:
* calculate the constant  $k = (\frac{4 \pi}{\lambda})² P_r$,
* where
* $P_r$ is the power received by the head node (constant $-70$ milli decibels).

* square the distances (which are in the "h_to_node" column)



In [None]:
# speed of light
c = 299792458.0
freq = 2.4 * 10**9
P_r = 0.0000001 # convert -70 thousand decibels into milliwatts
comprimento = c / freq
k = P_r *((4 * np.pi)/comprimento)**2 

In [None]:
comprimento , k

In [None]:
for j in range(N_realiz):
    for i in range(N_cluster):
        df_rc_grid_no_head[j][i]['P_t'] = k * (df_rc_grid_no_head[j][i]['h_to_node']**2)
        df_rc_kmeans_no_head[j][i]['P_t'] = k * (df_rc_kmeans_no_head[j][i]['h_to_node']**2)

In [None]:
for j in range(N_realiz):
    for i in range(N_cluster):
        df_rc_grid_heads[j][i]['P_t'] = k * (df_rc_grid_heads[j][i]['h_to_sink']**2)
        df_rc_kmeans_heads[j][i]['P_t'] = k * (df_rc_kmeans_heads[j][i]['h_to_sink']**2)

In [None]:
df_rc_grid_heads[66][3]

In [None]:
df_rc_grid_no_head[66][3]['P_t'].sum()

# analysis of average distances

## sum over nodes

In [None]:

mpath_grid_c_to_n = [[df_rc_grid_no_head[j][i]['c_to_node'].sum() for i in range(N_cluster)]for j in range(N_realiz)] 
mpath_grid_h_to_n = [[df_rc_grid_no_head[j][i]['h_to_node'].sum() for i in range(N_cluster)]for j in range(N_realiz)]

mpath_kmeans_c_to_n = [[df_rc_kmeans_no_head[j][i]['c_to_node'].sum() for i in range(N_cluster)]for j in range(N_realiz)] 
mpath_kmeans_h_to_n = [[df_rc_kmeans_no_head[j][i]['h_to_node'].sum() for i in range(N_cluster)]for j in range(N_realiz)]


In [None]:
path_grid_c_to_n = pd.DataFrame(mpath_grid_c_to_n,columns=["cg0","cg1","cg2","cg3","cg4","cg5"])

In [None]:
path_kmeans_c_to_n = pd.DataFrame(mpath_kmeans_c_to_n,columns=["ck0","ck1","ck2","ck3","ck4","ck5"])

In [None]:
path_grid_h_to_n = pd.DataFrame(mpath_grid_h_to_n,columns=["hg0","hg1","hg2","hg3","hg4","hg5"])

In [None]:
path_kmeans_h_to_n = pd.DataFrame(mpath_kmeans_h_to_n,columns=["hk0","hk1","hk2","hk3","hk4","hk5"])

In [None]:

path_kmeans_c_to_n.mean().mean(), path_kmeans_c_to_n.mean().std()

In [None]:
path_kmeans_c_to_n.mean()

In [None]:
path_kmeans_c_to_n.mean().hist(), path_grid_c_to_n.mean().hist()

In [None]:
path_grid_c_to_n.mean().mean(), path_grid_c_to_n.mean().std()

# Analysis of average distances

In [None]:
sns.histplot([path_grid_c_to_n["cg0"],path_kmeans_c_to_n["ck0"]], bins = 50, kde = True)

In [None]:
sns.histplot([path_grid_h_to_n["hg0"],path_kmeans_h_to_n["hk0"]], bins = 50, kde = True)

In [None]:
sns.histplot([path_grid_c_to_n["cg0"],path_kmeans_c_to_n["ck0"],path_grid_h_to_n["hg0"],path_kmeans_h_to_n["hk0"]], bins = 50, kde = True)

# analysis of average powers

## sum over nodes

In [None]:
mpot_grid = [[df_rc_grid_no_head[j][i]['P_t'].sum() for i in range(N_cluster)]for j in range(N_realiz)] 
mpot_kmeans = [[df_rc_kmeans_no_head[j][i]['P_t'].sum() for i in range(N_cluster)]for j in range(N_realiz)]

mpot_grid_sink = [[df_rc_grid_heads[j][i]['P_t'].sum() for i in range(N_cluster)]for j in range(N_realiz)]
mpot_kmeans_sink = [[df_rc_kmeans_heads[j][i]['P_t'].sum() for i in range(N_cluster)]for j in range(N_realiz)]

In [None]:
pot_grid = pd.DataFrame(mpot_grid,columns=["pg0","pg1","pg2","pg3","pg4","pg5"])

In [None]:
pot_kmeans = pd.DataFrame(mpot_kmeans,columns=["pk0","pk1","pk2","pk3","pk4","pk5"])

In [None]:
pot_grid

# Analysis of average distances

In [None]:
sns.histplot([pot_grid["pg0"],pot_grid["pg1"],pot_grid["pg2"],pot_grid["pg3"],pot_grid["pg4"],pot_grid['pg5']], bins = 50, kde = True)

In [None]:
sns.histplot([pot_kmeans["pk0"],pot_kmeans["pk1"],pot_kmeans["pk2"],pot_kmeans["pk3"],pot_kmeans["pk4"],pot_kmeans["pk5"]], bins = 50, kde = True)

In [None]:
# N_cluster = 6
pot_grid.mean().sum(), pot_kmeans.mean().sum()

## do the regression of the efficiency law and the efficiency comparison law

In [None]:
print(pot_grid.mean())
print(pot_kmeans.mean())

In [None]:
# N_cluster = 4
pot_grid.sum().sum(), pot_kmeans.sum().sum()

# calculating the difference between average values

In [None]:
#for i in range(N_cluster):
  #  print(df_cluster_no_head[i]['c_to_node'].mean()-df_cluster_no_head[i]['h_to_node'].mean())

In [4]:
#df_cluster1['c_to_node'].mean()- df_cluster1['h_to_node'].mean(),df_cluster1['c_to_node'].min()

In [None]:
#df_cluster2['c_to_node'].mean()-df_cluster2['h_to_node'].mean(), df_cluster2['c_to_node'].min()

In [None]:
#df_cluster3['c_to_node'].mean()-df_cluster3['h_to_node'].mean(), df_cluster3['c_to_node'].min()

In [None]:
#df_cluster

In [None]:
#df_cluster0['P_t'] = k * (df_cluster0['h_to_node']**2)
#df_cluster1['P_t'] = k * (df_cluster1['h_to_node']**2)
#df_cluster2['P_t'] = k * (df_cluster2['h_to_node']**2)
#df_cluster3['P_t'] = k * (df_cluster3['h_to_node']**2)

In [None]:
#df_cluster3

# Generating graphs from the powers transmitted by each node

In [None]:
#for i in range(N_cluster):
   # df_cluster[i].drop(index = indice[i], inplace = True)

In [None]:
# eliminando o próprio cluster head
#df_cluster0.drop(index = indice_0, inplace = True)
#df_cluster1.drop(index = indice_1, inplace = True)
#df_cluster2.drop(index = indice_2, inplace = True)
#df_cluster3.drop(index = indice_3, inplace = True)

In [None]:
#df_cluster3

In [None]:
#df_cluster[0]['P_t'].hist(bins= 10)

In [None]:
#df_cluster[1]['P_t'].hist(bins= 10)

In [None]:
#df_cluster[2]['P_t'].hist(bins= 10)

In [None]:
#df_cluster3['P_t'].hist(bins= 10)

In [None]:
#pot = [df_cluster[i]['P_t'].sum() for i in range(N_cluster)]

In [None]:
#pot = df_cluster[0]['P_t'].sum(),df_cluster[1]['P_t'].sum(),df_cluster[2]['P_t'].sum()

### DISCOVER A GENERAL RULE (the Law) FOR ENERGY EFFICIENCY!
### that is, let's try to infer a mathematical function for efficiency with the variable N_cluster
### preferably done with a regression machine learning algorithm

### Additionally, we can add new input data to suggest new energy efficiency laws.

In [None]:
#np.array(pot).sum()

In [None]:
#plt.bar((0,1,2),pot)