# 5. Modeling - Multi layer preceptron

In [None]:
#imports
import pandas as pd
import os
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime

#folders
data_folder = "data"

#machine learning
from sklearn.neural_network import MLPClassifier    as mlc
from sklearn.neural_network import MLPRegressor     as mlr

#model scoring
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error


#warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
run_optim = False #runtime: 711 min

In [None]:
#plot styles
plt_style_c = px.colors.sequential.haline #complex
plt_style_s = px.colors.diverging.Portland #simple

#defualt plot size 
size = {
    "width" : 1500 ,
    "height" : 750 ,
}

#function for plotting
def scale_show(fig):

    #set font
    fig.update_layout(
        font = dict(size=16),
        title_font = dict(size=20),
        xaxis_title_font = dict(size=18),
        yaxis_title_font = dict(size=18),
    )

    #set size
    fig.update_layout(
        width=1500,
        height=750,
    )

    #show
    fig.show()

    return

In [None]:
df = pd.read_csv(os.path.join(data_folder, "df_main.csv"))
df.head().T

## 5.1 Data preparation, Modeling, Architecutre tuning

In [None]:
class Base(): #parent

    def __init__(self, df : object, y_col : list, data_folder : str, results_file : str, model_metric : str, n_jobs : int = 1, window : int = 30):

        #save raw df
        self.df_raw         = df.copy()

        #drop forbidden cols
        df = df.copy() #pass by value
        df = Base.__drop_forbidden_cols(df, y_col)

        #set dataframe for refferencing
        self.df             = df.copy() #windowed df, copy because obj is passed by refference

        #get and get x_col and y_col
        self.y_col          = y_col
        self.x_col          = list(df.drop(labels = y_col, axis = 1, inplace = False).columns.to_list())

        #misc params
        self.random_state   = 42
        self.n_jobs         = n_jobs
        self.data_folder    = data_folder
        self.results_file   = os.path.join(data_folder,results_file)
        self.model_metric   = model_metric

        #ann parameters
        self.default_param = {
            "activation"        : "relu",
            "solver"            : "adam",       #stochastic gradiant descent
            "alpha"             : 0.1,          #see: https://scikit-learn.org/stable/auto_examples/neural_networks/plot_mlp_alpha.html
            "learning_rate"     : "adaptive",   #trying to improve performance
            "shuffle"           : False,        #keep order because of time series
            "early_stopping"    : True,         #trying to reduce processing time
            "max_iter"          : 200,          #change this if the training is too slow
        }

        #windowing parameters
        self.x_window       = window #number of shifting window input features

        self.__setup()

        return

    def __setup(self):

        #order was chosen to minimize data loss, at the cost of more needed processing power

        #data preparation
        self.__windowing()
        self.__split_data()
        self.__standardize_data()

        #setup of metrics and results
        self.__set_assesment()

        return

    @staticmethod
    def __drop_forbidden_cols(df, y_col):

        forbidden_cols = ['date','t2m_t1', 't2m_t2', 't2m_t1_mean', 't2m_t2_mean', 't2m_t1_cat', 't2m_t2_cat']

        #prevent y_cols from being dropped from the data frame
        for y in y_col:
            if y in forbidden_cols:
                forbidden_cols.remove(y)

        #drop forbidden cols, to prevent adding future information to the time series
        print(f"Removed forbidden cols:\n{forbidden_cols}")
        df.drop(labels = forbidden_cols, axis = 1, inplace = True)

        return df

    def __windowing(self):
        """creates the windowed data frame"""

        self.x_col_windowed = self.x_col.copy() #copy, becaus lists are past by refference

        #input fetures: x
        for i in range(1, self.x_window + 1):
            for x_col in self.x_col: #inefficient but works just fine

                x_col_i             = f"{x_col}_-{i}"
                self.df[x_col_i]    = df[x_col].shift(i)

                self.x_col_windowed.append(x_col_i)

        #clean na columns, which were caused by the shifts
        self.df.dropna(inplace = True)
        print(f"\nApplying shifitng window:\nx_window: -{self.x_window}")

        return

    def __split_data(self):

        #reset index for splitting data
        self.df.reset_index(inplace = True, drop = True)
        length = self.df.shape[0]

        #setting split value fractions
        valid_frac_0      = 0.1
        test_frac_1       = 0.05
        train_frac_2      = 0.7
        valid_frac_3      = 0.1
        test_frac_4       = 0.05

        #get end indexes
        index_end_list = []
        cum_frac = 0

        for frac in [valid_frac_0, test_frac_1, train_frac_2, valid_frac_3, test_frac_4]:
            cum_frac += frac
            index_end_list.append(round(length * cum_frac))

        #get indexes (ugly code)
        df_indexes = self.df.index.tolist()
        train_i     = df_indexes[index_end_list[1] : index_end_list[2]]
        valid_i     = df_indexes[ : index_end_list[0]]                      + df_indexes[index_end_list[2] : index_end_list[3]]
        test_i      = df_indexes[index_end_list[0] : index_end_list[1]]     + df_indexes[index_end_list[3] : index_end_list[4]]

        #get df from indexes
        self.df_train_x = self.df[self.x_col_windowed].loc[self.df.index.isin(train_i)]
        self.df_train_y = self.df[self.y_col].loc[self.df.index.isin(train_i)]

        #create valid df
        self.df_valid_x = self.df[self.x_col_windowed].loc[self.df.index.isin(valid_i)]
        self.df_valid_y = self.df[self.y_col].loc[self.df.index.isin(valid_i)]

        #create valid df
        self.df_test_x = self.df[self.x_col_windowed].loc[self.df.index.isin(test_i)]
        self.df_test_y = self.df[self.y_col].loc[self.df.index.isin(test_i)]

        #check
        print("\nSplitting data:")
        for df, df_type in zip (
            [self.df_train_y,self.df_valid_y, self.df_test_y],
            "train,valid,test".split(",")
            ):

            print(f"{df_type} size:\t{round(df.shape[0] / length,2)}\t{df.shape[0]}")

        #set data for plotting in raw df
        self.df_raw["set"] = None
        for index_items, set_type in zip([train_i, valid_i, test_i],["train", "valid", "test"]):
            self.df_raw.loc[self.df_raw.index.isin(index_items), "set"] = set_type

        return

    def plot_set_distribution(self, plotter, style, plt_style):

        if (style == "histogram") and (self.model_metric == "c"): #only plotlable with classificaiton model

            fig = px.histogram(
                data_frame = self.df_raw,
                x = "set",
                color = "t2m_t2_cat",
                histfunc = "count",

                barmode = "group",
                title = "Categorical distribution of sets",
                color_discrete_sequence = plt_style,
            )

        elif (style == "scatter"):

            fig = px.scatter(
                data_frame = self.df_raw,
                x = "date",
                y = "t2m",
                color = "set",

                title = "Trend distribution of sets",
                color_discrete_sequence = plt_style,
            )

        plotter(fig)

    def __split_data_deprecated(self):

        #df length
        length = self.df.shape[0]

        #setting split values
        valid_frac     = 0.2
        test_frac      = 0.1

        #get indexes
        train_end       = round(length * (1 - (valid_frac + test_frac)))
        valid_end       = round(length * (1 - (test_frac)))
        test_end        = round(length * (1))

        #create train df
        self.df_train_x = self.df[self.x_col_windowed].iloc[:train_end]
        self.df_train_y = self.df[self.y_col].iloc[:train_end]

        #create valid df
        self.df_valid_x = self.df[self.x_col_windowed].iloc[train_end:valid_end]
        self.df_valid_y = self.df[self.y_col].iloc[train_end:valid_end]

        #create valid df
        self.df_test_x = self.df[self.x_col_windowed].iloc[valid_end:test_end]
        self.df_test_y = self.df[self.y_col].iloc[valid_end:test_end]

        #check
        print("\nSplitting data:")
        for df, df_type in zip (
            [self.df_train_y,self.df_valid_y, self.df_test_y],
            "train,valid,test".split(",")
            ):

            print(f"{df_type} size:\t{round(df.shape[0] / length,2)}\t{df.shape[0]}")

        return

    def __standardize_data(self):

        label_cat = [0,1]; label_cat.sort()
        self.standardizing_values = {
            "x" : {},
            "y" : {},
            #    col1 : {"mean" : value, "std"  : value},
            #    col2 : {"mean" : value, "std"  : value},
            #}
            #"y" : ...
        }

        print("\nStandardizing values:")
        for col in self.df.columns:

            distinct_values = list(self.df[col].unique())
            distinct_values.sort()

            if label_cat == distinct_values: #skip categorical values
                continue

            #get mean and std for all columns across both data both data frames
            if col in self.x_col_windowed:

                self.standardizing_values["x"][col]             = {}
                self.standardizing_values["x"][col]["mean"]     = self.df_train_x[col].mean()
                self.standardizing_values["x"][col]["std"]      = self.df_train_x[col].std()

            elif col in self.y_col:

                self.standardizing_values["y"][col]             = {}
                self.standardizing_values["y"][col]["mean"]     = self.df_train_y[col].mean()
                self.standardizing_values["y"][col]["std"]      = self.df_train_y[col].std()

        #apply values
        for df, col_type in zip([self.df_train_x, self.df_valid_x, self.df_test_x, self.df_train_y, self.df_valid_y, self.df_test_y], ["x","x","x","y","y","y"]):
            for col in self.standardizing_values[col_type].keys():

                mean    = self.standardizing_values[col_type][col]["mean"]
                std     = self.standardizing_values[col_type][col]["std"]
                df[col] = (df[col] - mean) / std #standardization

        #check sum
        print(f"Checksum train x: {self.df_train_x[list(self.standardizing_values['x'].keys())].mean().round(2).sum()}")
        print(f"Checksum train y: {self.df_train_y[list(self.standardizing_values['y'].keys())].mean().round(2).sum()}")

        return

    def unstandardize_data(self):

        #apply values
        for df, col_type in zip([self.df_train_x, self.df_valid_x, self.df_test_x, self.df_train_y, self.df_valid_y, self.df_test_y], ["x","x","x","y","y","y"]):
            for col in self.standardizing_values[col_type].keys():

                mean    = self.standardizing_values[col_type][col]["mean"]
                std     = self.standardizing_values[col_type][col]["std"]
                df[col] = df[col] * std + mean #reversed standardization

        return

    def __unstanardize_y(self, y_t1, y_t2):

        mean_t1        = self.standardizing_values["y"]["t2m_t1"]["mean"]
        mean_t2        = self.standardizing_values["y"]["t2m_t2"]["mean"]
    
        std_t1        = self.standardizing_values["y"]["t2m_t1"]["std"]
        std_t2        = self.standardizing_values["y"]["t2m_t2"]["std"]

        y_t1_unst  = y_t1 * std_t1 + mean_t1
        y_t2_unst  = y_t2 * std_t1 + mean_t2

        return y_t1_unst, y_t2_unst

    def __set_assesment(self):

        if self.model_metric == "c":
            self.get_model_score = self.__get_model_score_c

        elif self.model_metric == "r":
            self.get_model_score = self.__get_model_score_r

    def __get_model_score_c(self, model = None, get_test_score = False): #used, when model_metric == "c"

        #default
        get_conf_mat = False
        mat_labels = [0,1]

        #if a model is passed, the function is calles from run_optim, otherwise,
        if model is None:
            model = self.model
            get_conf_mat = True #only get confuciton matrix when a single model is created
        if model is None:
            print("No model has been set. Create a model first or pass one as a param")
            return

        score = {}

        #create predictions
        y_train_pred    = model.predict(self.df_train_x)
        y_valid_pred    = model.predict(self.df_valid_x)
        y_test_pred     = model.predict(self.df_test_x)

        #seperate t1 and t2 for individual scoring
        for raw_key, y_pred, y in zip(
            ["train",           "valid",            "test"],
            [y_train_pred,      y_valid_pred,       y_test_pred],
            [self.df_train_y,   self.df_valid_y,    self.df_test_y],
        ):

            #not fetting test accurarcy if not set
            if (get_test_score == False) and raw_key == "test":
                continue

            #split
            y_pred_t1 = y_pred[:,0]
            y_pred_t2 = y_pred[:,1]

            y_t1 = y[self.y_col[0]]
            y_t2 = y[self.y_col[1]]

            #get acc
            score[f"{raw_key}_accuracy_t1"]     = round(accuracy_score(y_true = y_t1, y_pred = y_pred_t1),3)
            score[f"{raw_key}_accuracy_t2"]     = round(accuracy_score(y_true = y_t2, y_pred = y_pred_t2),3)
            score[f"{raw_key}_accuracy"]        = round(accuracy_score(y_true = y, y_pred = y_pred),3)

            #get conf mat
            if get_conf_mat is True:
                score[f"{raw_key}_mat_t1"]     = confusion_matrix(y_true = y_t1, y_pred = y_pred_t1, labels = mat_labels)
                score[f"{raw_key}_mat_t2"]     = confusion_matrix(y_true = y_t2, y_pred = y_pred_t2, labels = mat_labels)
                #score[f"{raw_key}_mat"]        = confusion_matrix(y_true = y, y_pred = y_pred) #multi labels are not supported

        #return metrics
        if get_conf_mat is True:
            self.score = score
            [print(f"{key} :\t\t{score[key]}") for key in score.keys() if isinstance(score[key],float)]
            return

        return score

    def __get_model_score_r(self, model = None, get_test_score = False, unstandardize_score = False): #used, when model_metric == "c"

        #if a model is passed, the function is calles from run_optim, otherwise,
        set_score = False

        if model is None: #model is not none when automation is run
            model = self.model
            set_score = True
        if model is None:
            print("No model has been set. Create a model first or pass one as a param")
            return

        score = {}

        #create predictions
        y_train_pred    = model.predict(self.df_train_x)
        y_valid_pred    = model.predict(self.df_valid_x)
        y_test_pred     = model.predict(self.df_test_x)

        #seperate t1 and t2 for individual scoring
        for raw_key, y_pred, y in zip(
            ["train",           "valid",            "test"],
            [y_train_pred,      y_valid_pred,       y_test_pred],
            [self.df_train_y,   self.df_valid_y,    self.df_test_y],
        ):

            #not fetting test accurarcy if not set
            if (get_test_score == False) and raw_key == "test":
                continue

            #split
            y_pred_t1 = y_pred[:,0]
            y_pred_t2 = y_pred[:,1]

            y_t1 = y[self.y_col[0]]
            y_t2 = y[self.y_col[1]]

            #unstandardize data (ugly code go brrrr)
            if unstandardize_score:
                y_t1, y_t2              = self.__unstanardize_y(y_t1 = y_t1, y_t2 = y_t2)
                y_pred_t1, y_pred_t2    = self.__unstanardize_y(y_t1 = y_pred_t1, y_t2 = y_pred_t2)

                y_pred[:,0], y_pred[:,1]                = y_pred_t1, y_pred_t2
                y[self.y_col[0]], y[self.y_col[1]]      = y_t1, y_t2

            #get r^2
            score[f"{raw_key}_r^2_t1"]      = round(r2_score(y_true = y_t1, y_pred = y_pred_t1),3)
            score[f"{raw_key}_r^2_t2"]      = round(r2_score(y_true = y_t2, y_pred = y_pred_t2),3)
            score[f"{raw_key}_r^2"]         = round(r2_score(y_true = y, y_pred = y_pred),3)

            #get rmse
            score[f"{raw_key}_rmse_t1"]      = round(np.sqrt(mean_squared_error(y_true = y_t1, y_pred = y_pred_t1)),3)
            score[f"{raw_key}_rmse_t2"]      = round(np.sqrt(mean_squared_error(y_true = y_t2, y_pred = y_pred_t2)),3)
            score[f"{raw_key}_rmse"]         = round(np.sqrt(mean_squared_error(y_true = y, y_pred = y_pred)),3)

        #return metrics
        if set_score:
            self.score = score
            [print(f"{key} :\t\t{score[key]}") for key in score.keys() if isinstance(score[key],float)]
            return

        return score

    def plot_confusion_mat(self, set = "valid"):
        """set = 'train', 'valid', 'test'"""

        mat_keys = [key for key in self.score.keys() if ("mat" in key) and (set in key)]

        for mat_key in mat_keys:

            mat = self.score[mat_key]
            title = str(mat_key).replace("_mat_", " ")

            fig  = px.imshow(
                mat,
                color_continuous_scale = px.colors.sequential.haline_r,
                text_auto = True,
            )

            #labels and layout
            fig.update_layout(

                title = f"Confusion matrix: {title}",

                width=500,
                height=500,
                
                xaxis_title="Predicted label",
                yaxis_title="True label",

                xaxis = dict(
                    tickmode = 'array',
                    tickvals = [0,1],
                    ticktext = ["above", "below"]
                ),

                yaxis = dict(
                    tickmode = 'array',
                    tickvals = [0,1],
                    ticktext = ["above", "below"],
                ),
            )

            #set font
            fig.update_layout(
                font = dict(size=16),
                title_font = dict(size=20),
                xaxis_title_font = dict(size=18),
                yaxis_title_font = dict(size=18),
            )

            fig.show()

    def save_result(self, param, score):

        #merge and create a dataframe
        param.update(score); data = param
        df_result = pd.DataFrame([data])

        #create results file and set header length as param to negate reading file
        if os.path.isfile(self.results_file) is True:
            df_saved_result = pd.read_csv(self.results_file)
            df_result = df_saved_result.append(df_result)

        df_result.to_csv(self.results_file, index = False)

        return

    def get_results(self):

        df = pd.read_csv(self.results_file)
        return df

In [None]:
a = list(range(10))
print(sum(a))

In [None]:
class ML(Base): #child

    def run_optim(self, nu_max = 11, degrees = 9, use_small_arch_list = False):

        self.model = None #clear any models if there should be one

        if use_small_arch_list is False:
            arch_list = self.__generate_arch_list_all(
                n_layers            = 4,
                n_node_steps_div    = 3,
                min_nodes           = 10
            )
        elif use_small_arch_list is True:
            arch_list = self.__generate_arch_list_limited()

        for arch in arch_list:
            print( f"Progress of optim:\t{round((arch_list.index(arch) / len(arch_list)) * 100,1)}",end = "\r")

            #refromat to save all comparison data
            param = {
                "hidden_layer_sizes"        : arch,
                "n_layers"                  : len(arch),
                "n_neurons"                 : sum(arch),
                "mean_neurosn_per_layer"    : sum(arch) / len(arch),
            }

            #create model scoring to evalute models
            score = self.create_model(arch = arch, single_model = False)
            self.save_result(param = param, score = score)

        print("Optim successfull. Read results with self.get_results()")
        return

    def __generate_arch_list_limited(self, arch_log = 2, lin_arch_scaling = 2, n_layers = 5):

        arch_list : list = []
        n_features = len(self.df_train_x.columns.tolist())

        #nodes_pow_2 = [3 ** (cone_arch_base_power + p) for p in range(1, n_layers + 1)][::-1]
        nodes_log_2 = [int(n_features * (1 / arch_log ** i)) for i in range(1,n_layers + 1)]

        for n_layer in range(1, n_layers + 1):

            #linear
            for size in  [n_features / (i * lin_arch_scaling) for i in range(1,4)]:
                arch_lin = [int(size)] * n_layer
                arch_list.append(arch_lin)

            #cone
            arch_cone = nodes_log_2[:n_layer]
            arch_list.append(arch_cone)

            #cone r
            arch_cone_r = arch_cone[::-1]
            arch_list.append(arch_cone_r)

        return arch_list

    def __generate_arch_list_all(self, n_layers = 4, n_node_steps_div = 3, min_nodes = 10):

        n_input_nodes : int = len(self.df_train_x.columns)
        arch_list = []
        n_node_list : list = [n_input_nodes]

        #set fixed params
        min_node_division = 2 #minumum number of nodes on a layer

        #divisonal
        counter = 1

        while True:

            n_nodes = int(n_input_nodes / (n_node_steps_div ** counter))

            if n_nodes < min_nodes:
                break

            n_node_list.append(n_nodes)
            counter += 1

        #clean up
        n_node_list = list(set(n_node_list)); n_node_list.append(0); n_node_list.sort()

        #create archs
        for i in range(1, (len(n_node_list)**n_layers) + 1):

            arch = []

            for j in list(range(n_layers))[::-1]:

                v =  int((i % (len(n_node_list) ** (j + 1)) / (len(n_node_list) ** j)))
                arch.append(n_node_list[v])

            arch = [k for k in arch if k != 0] #remove zero value

            if arch in arch_list:
                continue
            arch_list.append(arch)

        return arch_list

    def create_model(self, arch, single_model = True, param = None):
        """if single_model == False:
            the scores get retuned
            self.mode is not set
        elif single_model == True:
            scores do not get returned
            seld.model is set"""

        #if no parameters are given, the following default params are used
        if param is None:
            param = self.default_param

        #create model
        if self.model_metric == "r":
            ml_model = mlr
            #multi_output = MultiOutputRegressor

        elif self.model_metric == "c":
            ml_model = mlc
            #multi_output = MultiOutputClassifier

        model = ml_model(
            random_state            = self.random_state,
            hidden_layer_sizes      = arch,
            **param,
        )

        #model = multi_output(
        #    n_jobs = self.n_jobs,
        #    estimator = ml_model(
        #        random_state    = self.random_state,
        #        **param, #unpack the dict and dumps its values
        #    )
        #)

        #fit model
        model.fit(X = self.df_train_x, y = self.df_train_y)

        #set according metrics
        if single_model is True:
            self.model = model
            print(self.model)
            return

        elif single_model is False:
            score = self.get_model_score(model)
            return score

In [None]:
mlc_obj = ML(
    df              = df,
    y_col           = ["t2m_t1_cat", "t2m_t2_cat"], #or ["t2m_t1_cat", "t2m_t1_cat"]

    n_jobs          = 5,

    data_folder     = data_folder,
    results_file    = "optim_reults_mlc.csv",

    model_metric    =  "c", # r = regression, c = classification
    window          = 30,
)

In [None]:
if run_optim is True:
    mlc_obj.run_optim()

## 5.2 Model evalution

In [None]:
df_results = mlc_obj.get_results()
df_results.sort_values(by = "valid_accuracy_t1", ascending = False, inplace = True)

df_results.head(10)


In [None]:
df_results.sort_values(by = "valid_accuracy_t2", ascending = False, inplace = True)
df_results.head(10)

In [None]:
df_results.sort_values(by = "valid_accuracy", ascending = False, inplace = True)
df_results.head(10)

In [None]:
for col in ["valid_accuracy", "valid_accuracy_t1", "valid_accuracy_t2"]:

    fig = px.scatter(
        data_frame = df_results,
        x = "n_neurons",
        y = col,
        color = "mean_neurosn_per_layer",
        #size = "n_layers",
        color_continuous_scale = plt_style_c,

        title = "Fitting graph: Multi-layer perceptron classifier",
        opacity = 1,
        trendline = "lowess",

        labels = {"mean_neurosn_per_layer": "mean neurons\nper layer"}
    )

    #fig.update_traces(marker=dict(line=dict(color='rgba(0, 0, 0, 0)')))

    scale_show(fig)

In [None]:
class Networkplotter():

    def plot_ann(arch = [10,3,2,1], title = "ANN architecture"):

        structure = Networkplotter.create_neurons(arch) #df
        connections = Networkplotter.create_connections(structure, arch) #dict
        Networkplotter.draw_network(structure, connections, arch, title)

    def create_neurons(arch):

        structure = {
            "layer_pos"  : [],
            "neuron_pos" : [],
        }

        max_neurons = max(arch)
        mid_pos = max_neurons / 2

        for i in range(len(arch)):

            neuron_pos = mid_pos - (arch[i] / 2)

            for neuron in range(arch[i]):

                structure["layer_pos"].append(i),
                structure["neuron_pos"].append(neuron_pos)
                neuron_pos += 1

        return pd.DataFrame(structure)

    def create_connections(structure, arch):

        connections = {
            "x" :   [], #(x1,x2), (x1,x2), layer_pos
            "y" :   [], #(y1,y2), (y1,y2), neuron_pos
        }

        relevant_layers = list(range(len(arch)))[:-1]
        relevant_neurons = structure.loc[structure["layer_pos"].isin(relevant_layers)]

        for i in range(relevant_neurons.shape[0]):

            x1 = structure.iloc[i]["layer_pos"]
            y1 = structure.iloc[i]["neuron_pos"]
            x2 = x1 + 1

            for j in structure.loc[structure["layer_pos"] == x2].index.tolist():
                y2 = float(structure.iloc[j]["neuron_pos"])

                connections["x"].append((x1,x2))
                connections["y"].append((y1,y2))

        return connections

    def draw_network(structure, connections, arch, title):

        width   = len(arch) * 150
        height  = 700
        structure["size"] = 1

        fig_base = px.scatter(
            data_frame = structure,
            x = "layer_pos",
            y = "neuron_pos",
            size_max = 10,
            size = "size",

            title = title,

            width = width,
            height = height,
            #color = "neuron_pos",
            labels = {"layer_pos" : "layer", "neuron_pos" : "",}
        )

        data = fig_base.data
        for i in range(len(list(connections["x"]))):

            fig_base.add_shape(
                type='line',
                x0 = connections["x"][i][0], y0 = connections["y"][i][0],
                x1 = connections["x"][i][1], y1 = connections["y"][i][1],
                line=dict(color="lightgrey", width=2),
                layer = "below",
            )

        tick_text = list(range(len(arch)))
        tick_text[0] = "Input layer"
        tick_text[-1] = "Output layer"

        fig_base.update_layout(
            xaxis = dict(
                tickmode = 'array',
                tickvals = list(range(len(arch))),
                ticktext = tick_text,
            )
        )

        #unlcean code
        fig_base.update_yaxes(showticklabels=False)
        fig_base.update_layout(
            xaxis=dict(showgrid=False),
            yaxis=dict(showgrid=False)
        )
        fig_base.update_layout({
            "plot_bgcolor": "rgba(255, 255, 255, 255)",
            "paper_bgcolor": "rgba(255, 255, 255, 255)",
            })

        fig_base.show()


In [None]:
ann_arch = [len(mlc_obj.df_train_x.columns.tolist()), 12, 12, 12, 330, 4]
ann_arch = [int(item / 4) for item in ann_arch]

print(ann_arch)


#Networkplotter.plot_ann(
#    arch = ann_arch,
#    title = "ANN architecture: MLP classifier",
#)

In [None]:
mlc_obj.create_model(arch = [12, 12, 12, 330])

In [None]:
optimal_param = pd.DataFrame(mlc_obj.default_param, index = ["optimal paraeters"])
optimal_param["hidden layer size"] = str([12, 12, 12, 330])
optimal_param.T

In [None]:
scores = mlc_obj.get_model_score()

In [None]:
mlc_obj.plot_confusion_mat(set = "valid")

In [None]:
mlc_obj.get_model_score(get_test_score = True)

In [None]:
mlc_obj.plot_confusion_mat(set = "test")

In [None]:
#further improvements

#overwrite defualt params

for alpha in [1,0.5,0.25, 0.1, 0.01, 0.001]:
    print(f"\nAlpha: {alpha}")
    mlc_obj.default_param = {
        "activation"        : "relu",
        "solver"            : "adam",
        "alpha"             : alpha,         #defualt:   0.1
        "learning_rate"     : "adaptive",
        "shuffle"           : False,
        "early_stopping"    : True,
        "max_iter"          : 200,
    }

    mlc_obj.create_model(arch = [12, 12, 12, 330])
    mlc_obj.get_model_score()

## 5.3 Overfitting prevention

## 5.3 Regression model

In [None]:
mlr_obj = ML(
    df              = df,
    y_col           = ["t2m_t1", "t2m_t2"], #or ["t2m_t1_cat", "t2m_t1_cat"]

    n_jobs          = 5,

    data_folder     = data_folder,
    results_file    = "optim_reults_mlr.csv",

    model_metric    =  "r", # r = regression, c = classification
    window          = 30,
)

In [None]:
if run_optim is True:
    mlr_obj.run_optim(use_small_arch_list = True)

In [None]:
df_results = mlr_obj.get_results()
df_results.sort_values(by = "valid_rmse", ascending = True).head(5)

In [None]:
df_results.sort_values(by = "valid_r^2", ascending = False).head(5)

In [None]:
for arch in df_results["hidden_layer_sizes"].tolist():
    print(arch, end = ", ")

In [None]:
mlr_obj.create_model(arch = [496, 248, 124, 62])

In [None]:
mlr_obj.get_model_score(get_test_score = True, unstandardize_score = False)

In [None]:
mlr_obj.get_model_score(get_test_score = True, unstandardize_score = True)

In [None]:
draw_nn = False

if draw_nn is True:
    ann_arch = [496 * 2, 496, 248, 124, 62, 2]
    ann_arch = [int(item / 25) if int(item / 25) > 0 else 1 for item in ann_arch]

    print(ann_arch)


    Networkplotter.plot_ann(
        arch = ann_arch,
        title = "ANN architecture: MLP regressor",
    )