# Testing models for node group finding

In [None]:
import os
import sys
import plotly.express as px
import plotly.graph_objects as go


module_path = os.path.abspath(os.path.join('./../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.divergence_matrix.DivergenceMatrixProcessor import DivergenceMatrixProcessor
from src.state_comparator.comparator_functions import *

from src.helper_functions import pretty_print
import src.configfile as config

## Ploting data for all 8 sensors

In [None]:
SELECTED_NODES = ["SenzorComunarzi-NatVech", "SenzorCernauti-Sebesului", "SenzorChisinau-Titulescu",
                  "SenzorComunarzi-castanului", "Jonctiune-3974", "Jonctiune-J-3", "Jonctiune-J-19", "Jonctiune-2749"]
instance = DivergenceMatrixProcessor("./../../data/divergence_matrix/Divergence_M.pickle")

In [None]:
for node in SELECTED_NODES:
    print(node)
    arr_of_nodes, data_df = instance.nodes_which_effect_the_sensors_most(16.0, node)

    fig = go.Figure()
    for column in data_df.columns:
        temp = [column for i in range(len(data_df[column]))]


        fig.add_trace(go.Scattergl(
                                x=temp, 
                                y=data_df[column],
                                text=data_df[column].index,
                                mode='markers',
                                name=column))
        fig.update_layout(
            xaxis_title="Seconds",
            yaxis_title="Pressure difference",
        )

    fig.show()

## Testing different clustering/statistical approaches
Kernel density estimation and Jenks natural breaks optimization should be used for the analysis

In [None]:
sensor_node = "Jonctiune-J-19"
time_of_day = 36000

node_arr, test_data_df = instance.nodes_which_effect_the_sensors_most(16.0, sensor_node)
test_data_df

### Kernel density estimation



In [None]:
test_data_df[36000].plot.kde()

### Jenks natural breaks optimization


https://stats.stackexchange.com/questions/143974/jenks-natural-breaks-in-python-how-to-find-the-optimum-number-of-breaks    
https://github.com/mthh/jenkspy

In [None]:
break_data = test_data_df[time_of_day].values
break_data.sort()
display(break_data)
display(test_data_df[time_of_day].mean())

In [None]:
import jenkspy
breaks = jenkspy.jenks_breaks(break_data, nb_class=3)

fig = go.Figure()

fig.add_trace(
    go.Histogram(x=data_df[time_of_day])
)
print(max(break_data))    
    
for break_value in breaks:
    fig.add_shape(
            go.layout.Shape(type='line', xref='x', yref='paper',
                            x0=break_value, y0=0, x1=break_value, y1=1.0, 
                              line=dict(
                                color="green",
                                width=3,
                                dash="dash",
                            ),
                            name=str(break_value)
                           )
                           
    )

fig.show()
display(str(breaks))