# Divergence matrix data and hour correlation

In [None]:
import os
import numpy as np
import sys
module_path = os.path.abspath(os.path.join('./../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.divergence_matrix.DivergenceMatrixProcessor import DivergenceMatrixProcessor
from src.state_comparator.comparator_functions import *

divergence_instance = DivergenceMatrixProcessor("./../../data/divergence_matrix/Divergence_M.pickle")

Divegence data is original form.

In [None]:
sensor = "SenzorComunarzi-NatVech"

df_main = divergence_instance.extract_df_with_specific_leak_on_one_node(4.00, sensor)
display(df_main)

In [None]:
df_main[3600].plot.kde()

In [None]:
df_main[3600].plot.hist(bins=12, alpha=0.5)

In [None]:
df_main.plot.hist(bins=12, alpha=0.5, figsize=(20,10))

## Example of hour correlation

In [None]:
df_order_corr, df_basic_corr = divergence_instance.calculate_column_correlation(16.0, sensor)
display(df_order_corr, df_basic_corr)

### Vizualization for deliverable 

In [None]:
import plotly
import plotly.graph_objects as go

def plot_3d_hour_correlation(sensor_name):
    order_df, corr_df = divergence_instance.calculate_column_correlation(16.0, sensor_name)
    correlation_tup_arr = []
    for hour_second_1 in range(3600, 86401, 3600):
        for hour_second_2 in range(3600, 86401, 3600):
            if hour_second_1 == hour_second_2:
                continue
            corr_tup = (hour_second_1, hour_second_2, corr_df.at[hour_second_1, hour_second_2])
            correlation_tup_arr.append(corr_tup)

    x = [int(i[0] / 3600)  for i in correlation_tup_arr]
    y = [int(i[1] / 3600) for i in correlation_tup_arr]
    z = [i[2] for i in correlation_tup_arr]
    fig = go.Figure(data=[go.Scatter3d(
        x=x, y=y, z=z,
        mode='markers' 
    )])
    # scene = dict(zaxis = dict(range=[0,1])),
    fig.update_layout(
        title="Hour correlation",
        xaxis_title="Hour of the day 1",
        yaxis_title="Hour of the day 2",
        scene = dict(zaxis = dict(title="Correlation between x and y"))
    )
    fig.show()
    
sensors = ["SenzorComunarzi-NatVech", "SenzorCernauti-Sebesului", "SenzorChisinau-Titulescu", 
           "SenzorComunarzi-castanului", "Jonctiune-3974", "Jonctiune-J-3", "Jonctiune-J-19", 
           "Jonctiune-2749"]

# for sensor_i in sensors:
# print(sensor_i)
# plot_3d_hour_correlation(sensor_i)

## Calculation for all of the sensors

In [None]:
sensors = ["SenzorComunarzi-NatVech", "SenzorCernauti-Sebesului", "SenzorChisinau-Titulescu", 
           "SenzorComunarzi-castanului", "Jonctiune-3974", "Jonctiune-J-3", "Jonctiune-J-19", "Jonctiune-2749"]

order_df = None
basic_df = None
for sensor in sensors:
    print(sensor)
    df_order_corr_temp, df_basic_corr_temp = divergence_instance.calculate_column_correlation(16.0, sensor)
    
    if order_df is None and basic_df is None:
        order_df = df_order_corr_temp
        basic_df = df_basic_corr_temp
    else:
        order_df = order_df.add(df_order_corr_temp)     
        basic_df = basic_df.add(df_basic_corr_temp)        
        display(basic_df)
        
order_df = order_df.div(len(sensors))
basic_df = basic_df.div(len(sensors))

# setting diagonal to 0 since it is always 0 and doesn't really mean anything
for i in range(0, 24):
    order_df.iat[i, i] = np.nan
    basic_df.iat[i, i] = np.nan

In [None]:
display(order_df, basic_df)

In [None]:
display(order_df.mean(), basic_df.mean())

### Vizualization of elements change in the groups

In [None]:
display(basic_df)
average_corr_basic_df = basic_df.mean().round(5)

x_i = [i//3600 for i in average_corr_basic_df.index]
fig = go.Figure(data=[go.Scatter(
    x=x_i, 
    y=average_corr_basic_df.values *100, 
    mode='lines+markers'
)])

fig.update_layout(
    title="Average correlation between hours and nodes within main group",
    xaxis_title="Hour of the day",
    yaxis_title="Percent of the same nodes in the group",
)
fig.show()

average_corr_order_df = order_df.mean().round(5)

x_i = [i//3600 for i in average_corr_order_df.index]
fig = go.Figure(data=[go.Scatter(
    x=x_i, 
    y=average_corr_order_df.values *100, 
    mode='lines+markers'
)])

fig.update_layout(
    title="Hour correlation",
    xaxis_title="Hour of the day",
    yaxis_title="Percent of nodes that are the same in the group",
)
fig.show()

In [None]:
"""fig = go.Figure(data=go.Scatter(
    x=[i[0] for i in correlation_tup_arr], 
    y=[i[2] for i in correlation_tup_arr], 
    mode='markers'))

x = list(set([i[0] for i in correlation_tup_arr]))
y = list(set([i[1] for i in correlation_tup_arr]))
z = df_basic_corr.values # [i for i in correlation_tup_arr]
# print(x, y, z)
fig = go.Figure(data=[go.Surface(z=z, x=x, y=y)])""";