In [2]:
import plotly.express as px
import plotly.io as pio
import numpy as np
from statistic.Statistic import DataStatistic
from statistic.calc_covariation import *
from data.get_attr_data import get_attributes_data
from data.get_attr_data import get_other_attr_keys
from statistic.data_supplement import supplement_data
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

pio.renderers.default = "plotly_mimetype"
expected_item_count = 1269

def add_line(fig, y_value, st_x, end_x, value_name: str, row, col):
    fig.add_trace(go.Scatter(x=[st_x, end_x], y=[y_value, y_value], name=value_name), row=row, col=col)

def add_statistic_description(fig, statistic: DataStatistic, row, col):
    attr_names = []
    attr_values = []
    # get list of attr names and list of attr values
    # (NOTE: save matching between items from each list!!!)
    for attr, value in statistic.__dict__.items():
        attr_names.append(attr)
        attr_values.append(value)
    # create table
    statistic_table = go.Table(header=dict(values=["statistic item", "value"]),
                               cells=dict(values=[attr_names, attr_values]))
    # add table to graphic
    fig.add_trace(statistic_table, row=row, col=col)

# plot data statistic function; data - data of attribute 'attribute_name'
def view_statistic_data(statistic: DataStatistic, attribute_name: str, data):
    # graphic contains statistic data and distribution
    graphic = make_subplots(rows=2, cols=1, specs=[[{"type":"xy"}],
                                                   [{"type":"table"}]] )
    # init distribution:
    value_count = len(data)
    x = np.linspace(1, value_count, value_count)
    fig = go.Scatter(x=x, y=data, name=attribute_name, mode='markers')  # plot distribution graphic as go.Scatter

    # add distribution to graphic
    graphic.add_trace(fig, row=1, col=1)
    # add quantile's lines
    add_line(graphic, statistic.quantile_first, 1, value_count, "Quantile first", row=1, col=1)
    add_line(graphic, statistic.quantile_third, 1, value_count, "Quantile third", row=1, col=1)

    # add statistic data to graphic
    add_statistic_description(graphic, statistic, row=2, col=1)
    # customize graphic - dimensions, title and other parameters
    graphic.update_layout(height=800, title_text="Attribute: " + attribute_name)
    graphic.show()

# adding covariation matrix to graphic
def add_cov_matrix(cov_matrix, name: str, x_key_list, y_key_list, graphic, row, col):
    matrix_fig = go.Heatmap(z=cov_matrix, x=x_key_list, y=y_key_list, name=name)
    graphic.add_trace(matrix_fig, row=row, col=col)

# all_keys - all attributes keys (in_keys + out_keys + other_attr_keys: EXACTLY in this order!!!)
def view_cov_matrices(cov_matrices_dict, all_keys, in_keys, out_keys, other_keys):
    graphic = make_subplots(rows=1, cols=3)
    # get matrix names
    name1 = cov_matrices_names[0]   # all with all
    name2 = cov_matrices_names[1]   # all with in
    name3 = cov_matrices_names[2]   # all with out
    # add all with all corelation matrix
    add_cov_matrix(cov_matrices_dict[name1], name1, all_keys, all_keys, graphic, row=1, col=1)
    # add all with in corelation matrix
    add_cov_matrix(cov_matrices_dict[name2], name2, in_keys, np.concatenate((out_keys, other_keys), axis=0), graphic, row=1, col=2)
    # add all with out corelation matrix
    add_cov_matrix(cov_matrices_dict[name3], name3, out_keys, np.concatenate((in_keys, other_keys), axis=0), graphic, row=1, col=3)
    # customize graphic - dimensions, title and other parameters
    graphic.update_layout(width=2000, height=700, title_text="Corelation matrices")
    # view graphic
    graphic.show()


def main():
    input_file_name = "../data/data.txt"

    # other attributes keys
    other_attr_keys = get_other_attr_keys(input_file_name)
    # all attributes names list
    all_attr_names = np.concatenate((input_attributes_keys, output_attributes_keys, other_attr_keys),axis=0)

    # all data dict: key - attr names; value - attr data
    all_data_dict = get_attributes_data(input_file_name, all_attr_names)

    # all attributes statistic data: dict of pairs: key - attribute name, value - Statistic object
    all_statistic_dict = dict()

    # remove Nan items and replace it by specified value
    for attribute in all_attr_names:
        data = all_data_dict[attribute]
        all_data_dict[attribute] = supplement_data(data, expected_item_count)
    # get all attribute's statistic data
    for attr_name in all_attr_names:
        all_statistic_dict[attr_name] = DataStatistic(all_data_dict[attr_name], attr_name)
    # get cov matrices dict
    cov_matrices_dict = get_attributes_typical_cart(all_data_dict, other_attr_keys)
    #view_cov_matrix(cov_matrices_dict[cov_matrices_names[0]], all_attr_names, all_attr_names, "All attributes", "All attributes")
    #attr_name = all_attr_names[10]
    #view_statistic_data(all_statistic_dict[attr_name], attr_name, all_data_dict[attr_name])
    view_cov_matrices(cov_matrices_dict, all_attr_names, input_attributes_keys, output_attributes_keys, other_attr_keys)


if __name__ == "__main__":
    main()