This has some content that maybe usefull for the plots

In [1]:
import pandas as pd
import numpy as np
from torch import from_numpy
import seaborn as sns
import matplotlib.pyplot as plt
from torch.nn import MSELoss

In [2]:
def lossBySimulations(df_simulations, df_predictions, target_cols, initial_parameters_columns_name = "initial_parameters"):
    """
        lossBySimulations:
            goes through each simulation (a run of the model divided by the initial parameters)
            and compares the loss of the simulation.
        Args:
            (<class 'pandas.core.frame.DataFrame'>) df_simulations: To specify the simulations dataframe
            (<class 'pandas.core.frame.DataFrame'>) df_predictions: To specify the predictions dataframe
            (<class 'list'>) target_cols : To specify the target columns to look for
            (str) initial_parameters_name : Optional column name to input the initial parameters column
                                            name within the dataframe
        Returns:
            (<class 'pandas.core.frame.DataFrame'>)
            (<class 'dict'>)    
            
    """
    
    dictionary = {initial_parameters_columns_name : [], "Loss": []} 
    
    # This will divide the simulations and predictions by a specific columning name
    # help within the initial_parameter_column_name variable using pandas group by function
    # The group by function splits the dataframe into tuples. Each tuple has 2 variables
    # One holding the name of a specific column within the divided group
    # And the holding the corresponding data
    df_simulations = df_simulations.groupby(initial_parameters_columns_name)
    df_predictions = df_predictions.groupby(initial_parameters_columns_name)
    
    # This list will go through the simulations and predictions
    for simulations, predictions in zip(df_simulations, df_predictions):
        loss_func = MSELoss()
        
        # This is selecting the second variable in the tuple holding the data
        simulations_data = simulations[1]
        predictions_data = predictions[1]
      
        # This is selecting the name of the tuple
        simulations_column = simulations[0]
    
        # This is the simulations tensor
        simulations_tensor = from_numpy((simulations_data.iloc[:,target_cols]).to_numpy(dtype='float32'))
        predictions_tensor = from_numpy((predictions_data.iloc[:,target_cols]).to_numpy(dtype='float32'))
        
        # This is the loss function applied below
        loss = loss_func(simulations_tensor, predictions_tensor)
        
        columns_name = " ".join(str(col) for col in simulations_column)
        dictionary[initial_parameters_columns_name].append(columns_name)
        dictionary["Loss"].append(loss.item()) # puts it into a dictionary.
        
    return pd.DataFrame(data = dictionary), dictionary

In [3]:
def lossByTime(df_simulations, df_predictions, target_cols, initial_parameters_columns_name = "initial_parameters", time_colname = "time"):
    """
        lossByTime:
            This is going through each time within a simulations and caculating the loss.
        Args:
            (<class 'pandas.core.frame.DataFrame'>) df_simulations: To specify the simulations dataframe
            (<class 'pandas.core.frame.DataFrame'>) df_predictions: To specify the predictions dataframe
            (<class 'list'>) target_cols : To specify the target columns to look for
            (str) initial_parameters_name : optional string variable specifying the initial parameter column name
            (str) time_colname : optional string variable to specifiy the time columns
        Returns:
            (<class 'pandas.core.frame.DataFrame'>)
            (<class 'dict'>)    
            
        
            
    """
    dictionary = {"Time" : [], initial_parameters_columns_name : [], "Loss": []}
    
    # divides dataframe into into specific columns using the 
    # groupby pandas function
    df_simulations = df_simulations.groupby(initial_parameters_columns_name)
    df_predictions = df_predictions.groupby(initial_parameters_columns_name)
    for simulations, predictions in zip(df_simulations, df_predictions):
        # Get the data
        simulations_data = simulations[1]
        predictions_data = predictions[1]
      
        # specifys column name
        simulations_column = simulations[0]
    
        for row_idx in range(len(simulations_data)):
            
            # Goes through each row, to caculate an indivdual loss for a specific time in the simulation
            simulation_row = (simulations_data.iloc[row_idx, target_cols]).to_numpy(dtype = 'float32')
            prediction_row = (predictions_data.iloc[row_idx, target_cols]).to_numpy(dtype = 'float32')
            
            simulations_tensor = from_numpy(simulation_row)
            predictions_tensor = from_numpy(prediction_row)
            loss_func = MSELoss()
            loss = loss_func(simulations_tensor, predictions_tensor)
            
            dictionary["Time"].append(simulations_data[time_colname].iloc[row_idx])
            dictionary["Loss"].append(loss.item())
            dictionary[initial_parameters_columns_name].append(simulations_column)
            
    return pd.DataFrame(data = dictionary), dictionary

In [4]:
def timePlots(x, y, df):
    """
        timePlots:
            This is used to plot the time against the loss of a simulations
        Args:
            (str) x : This is the x column name
            (str) y : This is the y column name
            (<class 'pandas.core.frame.DataFrame'>) df: used to plot the time (must be of one simulation)
        
    """
    fi, ax = plt.subplots()
    ax.set(ylabel= y,
           xlabel= x)
    sns.color_palette("Set3", 10)
    sns.lineplot(x = x, y = y, data = df, ax = ax, label="loss")
    sns.despine(left=True, bottom=True)
    ax.legend(ncol=2, loc="lower right", frameon=True)

In [6]:
def simulaPlots(x, y, df, figsize = (10,150)):
    """
        simulaPlots:
            This is used to plot the simulation against the loss
        Args:
            (str) x : This is the x column name
            (str) y : This is the y column name
            (<class 'pandas.core.frame.DataFrame'>) df: used to plot the simulation
    """
    f, ax = plt.subplots(figsize=figsize)
    ax.set(ylabel = x,
           xlabel= y)
    sns.color_palette("Set3", 10)
    sns.barplot(x='Loss', y='initial_parameters', data= df, ax=ax, label="loss")
    sns.despine(left=True, bottom=True)
    ax.legend(ncol=2, loc="lower right", frameon=True)