In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.rcParams['figure.figsize'] = [18,20]
# plt.gcf().set_size_inches(18,20, forward=True) 

### Common Objects and Variables

In [2]:
regions = {
    'LAM':{
        'name': 'Latin America',
        'countries':['Argentina',
                   'Bolivia',
                  'Brasil',
                  'Chile',
                  'Colombia',
                  'Costa Rica',
                  'Cuba',
                  'Ecuador',
                  'El Salvador',
                  'Guatemala',
                  'Haiti',
                  'Honduras',
                  'Jamaica',
                  'Mexico',
                  'Nicaragua',
                  'Panama',
                  'Paraguay',
                  'Peru',
                  'Republica Dominicana',
                  'Trinidad y Tobago',
                  'Uruguay',
                  'Venezuela']
          },
    'DEVELOPED':{
        'name': 'Developed Countries',
        'countries':['Alemania',
                    'Australia',
                    'Austria',
                    'Bulgaria',
                    'Belgica',
                    'Canada',
                    'Republica Checa',
                    'Eslovaquia',
                    'Dinamarca',
                    'España',
                    'EEUU',
                    'Finlandia',
                    'Francia',
                    'Grecia',
                    'Hungria',
                    'Irlanda',
                    'Israel',
                    'Italia',
                    'Japon',
                    'Libano',
                    'Noruega',
                    'Nueva Zelanda',
                    'Paises Bajos',
                    'Polonia',
                    'Portugal',
                    'Puerto Rico',
                    'Reino Unido',
                    'Rumania',
                    'Suecia',
                    'Suiza',
                    'Armenia',
                    'Azerbaiyan',
                    'Bielorrusia',
                    'Estonia',
                    'Georgia',
                    'Kazajistan',
                    'Kirguistan',
                    'Letonia',
                    'Lituania',
                    'Moldavia',
                    'Rusia',
                    'Tayikistan',
                    'Turkmenistan',
                    'Ucrania',
                    'Uzbekistan',
                    'Macedonia',
                    'Bosnia y Herzegovina',
                    'Croacia',
                    'Serbia',
                    'Montenegro',
                    'Eslovenia']
          }
}



### Dataframe Utilities
Like filtering, gets, saves

In [3]:
def replace_value_in_column(df, seek_column,old_value, new_value):
    df.loc[df[seek_column] == old_value, seek_column] = new_value

In [4]:
def get_countries_for_region(df, country_column, region):
    return df.loc[df[country_column].isin(regions[region]['countries'])]
def get_missing_countries_for_region(df, country_column, region):
    df_countries_list = get_countries_for_region(df, country_column, region)[country_column]
    return set(regions[region]['countries']) - set(df_countries_list)

In [5]:
def get_mml_data(region, run=141):
    #TODO. En el futuro, podremos identificar corridas especificas.
    #Por ahora, run hace referencia al id de corrida en la web de MML
    return pd.read_csv(f"data/{region}/mml_experiment_{run}.csv")

In [6]:
from pathlib import Path
def save_porcentual_diff_to_excel_for_variable(df, variable_analyzed, region):
    path=f"data/output/diff_porcentual_{region}.xlsx"
    if not Path(path).is_file():
        df_diff_porcentual = df[['Year', 'diff_%']]
        df_diff_porcentual.columns = ['Year', variable_analyzed]
    else:
        df_diff_porcentual = pd.read_excel(path)
        df_diff_porcentual[variable_analyzed] = df['diff_%']
    df_diff_porcentual.to_excel(path, index=False)

### Plots

In [7]:
def compare_mml_vs_truth_line_plot(df, x_col_name, y_col_mml, y_col_truth, variable_analyzed, region, y_ticks=None):
    plt.figure(figsize=(18,10))
    plt.plot(df[x_col_name], df[y_col_mml], label = "MML Simulation", marker='o')
    plt.plot(df[x_col_name], df[y_col_truth], label = "Data", marker='o')
    plt.xticks(df[x_col_name], rotation=90)
    if not y_ticks is None:
        plt.yticks(y_ticks, rotation=90)
    plt.xlabel(x_col_name)
    plt.ylabel(variable_analyzed)
    plt.grid()
    plt.legend()
    plt.title(f"Year by Year plot of {variable_analyzed} values between measured data and MML standard simulation outputs for {regions[region]['name']}")
    plt.show()

#### Error visualization

We have a truth and a mml value.

Off course, we assume that truth is truth.

So the idea is calculate, year by year, the difference between them in terms of proportion. i.e 5% of difference for 1970; -3.6% of difference for 1974, etc

The sign will indicate if mml is below (-) or above (+) truth

In [8]:
def compute_porcentual_diff_between_truth_and_mml(df,mml_value_column, truth_value_column):
    df['diff_%'] = (df[mml_value_column] - df[truth_value_column]) / df[truth_value_column] * 100
    
def porcentual_diff_plot(df, variable_analyzed, region, x_ticks=None, y_ticks=None):
    plt.figure(figsize=(18,8))
    plt.bar(df['Year'], df['diff_%'])
    plt.grid(axis='y')
    if not y_ticks is None:
        plt.yticks(y_ticks)
    if not x_ticks is None:
        plt.xticks(x_ticks)
    plt.xlabel("Year")
    plt.ylabel("% difference")
    plt.title(f"Porcentual difference between truth dataset and standard MML run for {variable_analyzed} for {regions[region]['name']}")
    plt.show()