In [4]:
import plotly.io as pio
from statistic.Statistic import DataStatistic
from statistic.calc_covariation import *
from data.get_attr_data import get_attributes_data
from data.get_attr_data import get_other_attr_keys
from statistic.data_supplement import supplement_data_dict
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from data.global_data import data_set_file_name
from data.global_data import item_count
import plotly.figure_factory as ff

pio.renderers.default = "plotly_mimetype"

# count of decimal up to correlation coefficients will be rounded
round_count = 6

def add_line(fig, y_value, st_x, end_x, value_name: str, row, col):
    fig.add_trace(go.Scatter(x=[st_x, end_x], y=[y_value, y_value], name=value_name), row=row, col=col)

def add_statistic_description(fig, statistic: DataStatistic, row, col):
    attr_names = []
    attr_values = []
    # get list of attr names and list of attr values
    # (NOTE: save matching between items from each list!!!)
    for attr, value in statistic.__dict__.items():
        attr_names.append(attr)
        attr_values.append(value)
    # create table
    statistic_table = go.Table(header=dict(values=["statistic item", "value"]),
                               cells=dict(values=[attr_names, attr_values]))
    # add table to graphic
    fig.add_trace(statistic_table, row=row, col=col)

# plot data statistic function; data - data of attribute 'attribute_name'
def plot_statistic_data(statistic: DataStatistic, attribute_name: str, data):
    # graphic contains statistic data and distribution
    graphic = make_subplots(rows=2, cols=1, specs=[[{"type":"xy"}],
                                                   [{"type":"table"}]] )
    # init distribution:
    value_count = len(data)
    x = np.linspace(1, value_count, value_count)
    fig = go.Scatter(x=x, y=data, name=attribute_name, mode='markers')  # plot distribution graphic as go.Scatter

    # add distribution to graphic
    graphic.add_trace(fig, row=1, col=1)
    # add quantile's lines
    add_line(graphic, statistic.quantile_first, 1, value_count, "Quantile first", row=1, col=1)
    add_line(graphic, statistic.quantile_third, 1, value_count, "Quantile third", row=1, col=1)

    # add statistic data to graphic
    add_statistic_description(graphic, statistic, row=2, col=1)
    # customize graphic - dimensions, title and other parameters
    graphic.update_layout(height=800, title_text="Attribute: " + attribute_name)

    # return result graphic
    return graphic


# all_keys - all attributes keys (in_keys + out_keys + other_attr_keys: EXACTLY in this order!!!)
def plot_cov_matrices(cov_matrices_dict, all_keys, in_keys, out_keys, other_keys):
    # get matrix names
    name1 = cov_matrices_names[0]   # all with all
    name2 = cov_matrices_names[1]   # all with in
    name3 = cov_matrices_names[2]   # all with out
    # plot all with all correlation matrix
    all_cor_matrix_fig = ff.create_annotated_heatmap(np.around(cov_matrices_dict[name1], round_count), x=list(all_keys), y=list(all_keys), colorscale='Viridis', showscale=True)
    all_cor_matrix_fig.update_layout(width=5000, height=5000, title_text="All with all")
    # plot all with in correlation matrix
    in_cor_matrix_fig = ff.create_annotated_heatmap(np.around(cov_matrices_dict[name2], round_count), x=list(in_keys), y=list(np.concatenate((out_keys, other_keys), axis=0)), colorscale='Viridis', showscale=True)
    in_cor_matrix_fig.update_layout(width=700, height=5000, title_text="All with in")
    # plot all with out correlation matrix
    out_cor_matrix_fig = ff.create_annotated_heatmap(np.around(cov_matrices_dict[name3], round_count), x=list(out_keys), y=list(np.concatenate((in_keys, other_keys), axis=0)), colorscale='Viridis', showscale=True)
    out_cor_matrix_fig.update_layout(width=800, height=4700, title_text="All with out")

    # return plotted graphics as a tuple:
    return all_cor_matrix_fig, in_cor_matrix_fig, out_cor_matrix_fig

def view_graphic(graphic, view_key: str, name: str, mode: str):
    file_name = "../images/" + name + "." + mode

    # view and/or save figure
    if view_key == "save":
        graphic.write_image(file_name)
    elif view_key == "view":
        # view graphic
        graphic.show()
    elif view_key == "save+view":
        graphic.write_image(file_name)
        graphic.show()


def main():
    # get global data:
    input_file_name = data_set_file_name
    expected_item_count = item_count
    # other attributes keys
    other_attr_keys = get_other_attr_keys(input_file_name)
    # all attributes names list
    all_attr_names = np.concatenate((input_attributes_keys, output_attributes_keys, other_attr_keys),axis=0)

    # all data dict: key - attr names; value - attr data
    all_data_dict = get_attributes_data(input_file_name, all_attr_names)

    # all attributes statistic data: dict of pairs: key - attribute name, value - Statistic object
    all_statistic_dict = dict()

    # remove Nan items and replace it by specified value
    all_data_dict = supplement_data_dict(all_data_dict, expected_item_count)
    # get all attribute's statistic data
    for attr_name in all_attr_names:
        all_statistic_dict[attr_name] = DataStatistic(all_data_dict[attr_name], attr_name)
    # save statistic data
    for attr_name in all_attr_names:
        attr_statistic = all_statistic_dict[attr_name]
        attr_data = all_data_dict[attr_name]
        # plot graphic
        graphic = plot_statistic_data(attr_statistic, attr_name, attr_data)
        # save graphic as a pdf file
        view_graphic(graphic, "save", attr_name.replace("/", "_"), "pdf")

    # get cov matrices dict
    cov_matrices_dict = get_attributes_typical_cart(all_data_dict, other_attr_keys)
    # plot corelation matrices graphic
    graphic = plot_cov_matrices(cov_matrices_dict, all_attr_names, input_attributes_keys, output_attributes_keys, other_attr_keys)
    # save graphics as a pdf files and view it
    view_graphic(graphic[0], "save+view", "correlation_all_with_all", "pdf")
    view_graphic(graphic[1], "save", "correlation_all_with_in", "pdf")
    view_graphic(graphic[2], "save", "correlation_all_with_out", "pdf")


if __name__ == "__main__":
    main()