In [1]:
import os
path_parent = os.path.dirname(os.getcwd())
os.chdir(path_parent)

In [2]:
from ggmodel.graphmodel import GraphModel, concatenate_graph_specs
from ggmodel.validation import score_model_dictionnary, score_model, plot_diagnostic, make_baseline_computation_df, agg_score_by

from data_utils.database import select_dataset
from data_utils.utils import get_X_y_from_data, data_dict_from_df_tables
from models.landuse.GE3 import model_dictionnary

import pandas as pd
import numpy as np

  from pkg_resources import parse_version  # type: ignore


# Model developement notebook

The goal of this notebook is to develop and improve the emission model.

Remarks:

- comment 1
- comment 2
- comment 3

## Data

In [3]:
def df_dict_to_csv(df_dict, path='data/GE3'):
    
    for table, df in df_dict.items():
        df.to_csv(f'{path}/{table}.csv', index=False)


def prepare_GE3_model_data(from_cache=True):

    # Get the data
    df_tables = ['population', 'emissions']
    
    if from_cache:
        df_dictionnary = {df_table: pd.read_csv(f'data/GE3/{df_table}.csv', dtype={'Unit': str}) for df_table in df_tables}
    
    else:
        df_dictionnary = {df_table: select_dataset(df_table) for df_table in df_tables}
        
        df_dict_to_csv(df_dictionnary) # saves the result for big query

    # format the data
    df_dictionnary['emissions'] = df_dictionnary['emissions'][~df_dictionnary['emissions'].Item.isin(['All Animals'])]

    
    data_dict = data_dict_from_df_tables([df for _, df in df_dictionnary.items()])

    
    data_dict['OEi'] = data_dict['OEi'].dropna().droplevel(level='Item')
    data_dict['IN_F'] = data_dict['IN_F'].dropna().droplevel(level='Item')
    data_dict['EF_F'] = data_dict['EF_F'].dropna().droplevel(level='Item')
    data_dict['FE_CO2eq'] = data_dict['FE_CO2eq'].dropna().droplevel(level='Item')
    
    return data_dict

data_dict = prepare_GE3_model_data(from_cache=True)

## Model

In [4]:
scores = score_model_dictionnary(model_dictionnary, data_dict)

TMi_model: Done
TAi_model: Error: 'FPi'
M_xi_model: Done
TMP_CO2eq_model: Done
TMT_CO2eq_model: Done
TMA_CO2eq_model: Done
TEE_CO2eq_model: Done
FE_CO2eq_model: Done
GE3_partial_model: Error: No objects to concatenate
GE3_model: Done


In [5]:
for model_name, score in scores.items():
    print(model_name)
    print(score['score_by_Variable'])
    print()

TMi_model
           r2  correlation          rmse
Variable                                
TMi       1.0          1.0  4.014128e-16

M_xi_model
           r2  correlation          rmse
Variable                                
M_ASi     1.0          1.0  2.046328e-16
M_LPi     1.0          1.0  2.230453e-16
M_Ti      1.0          1.0  1.757402e-16

TMP_CO2eq_model
                 r2  correlation       rmse
Variable                                   
TMP_CO2eq  0.999997     0.999999  17.092812

TMT_CO2eq_model
                 r2  correlation          rmse
Variable                                      
E_TCH4i    1.000000     1.000000  8.633311e-09
E_Ti       0.999991     0.999997  2.239051e-05
TMT_CO2eq  0.999998     0.999999  2.151868e+00

TMA_CO2eq_model
                 r2  correlation      rmse
Variable                                  
TMA_CO2eq  0.999993          1.0  1.136032

TEE_CO2eq_model
            r2  correlation          rmse
Variable                                 
TE

## Scenario

In [None]:
model_dictionnary['GE3_model'].inputs_()

In [None]:
def run_GE3_projection(data_dict):
    '''GE3 is large so we have to rethink the projection'''
    data_dict = data_dict.copy()

    projection_dict = {
        'Pop': lambda x: x,
        'OEi': lambda x: x,
        'EF_EEi': lambda x: x,
        'TAi': lambda x: x,
        'IN_F': lambda x: x,
        'MYi': lambda x: x,
        'EF_ASi': lambda x: x,
        'EF_Ti': lambda x: x,
        'EF_CH4Ti': lambda x: x,
        'EF_Li': lambda x: x,
        'EF_F': lambda x: x,        
    }
    
    for variable, function in projection_dict.items():
        data_dict[variable] = function(data_dict[variable])

    return data_dict

def run_GE3_scenario(data_dict, MM_Ti=1/3, MM_ASi=1/3, MM_LPi=1/3):
    
    data_dict['MM_Ti'] = MM_Ti
    data_dict['MM_ASi'] = MM_ASi
    data_dict['MM_LPi'] = MM_LPi

    results = model_dictionnary['GE3_model'].run(data_dict)
    
    return results

In [None]:
filtered_data_dict = {k: v.loc['FRA', 2018, :] for k, v in data_dict.items()}

In [None]:
result =  run_GE3_scenario(filtered_data_dict, 1/3, 1/3, 1/3)

In [None]:
model_dictionnary['GE3_model'].draw()

In [None]:
from sklearn.preprocessing import LabelEncoder
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
emission_data_dict = {k: v for k, v in result.items() if k in ['TEE_CO2eq', 'TMA_CO2eq', 'TMT_CO2eq', 'TMP_CO2eq']}

def emission_data_dict_to_df(data_dict):
    
    data_dict = data_dict.copy()
    
    data = pd.concat([v.to_frame(name='Value').assign(Variable=k) for k, v in data_dict.items()], axis=0).reset_index().dropna()
    
    data = pd.concat([data, data.groupby('Variable').sum().reset_index().rename(columns={"Variable": 'Item'}).assign(Variable='Non-CO2 agricultural emissions')])
    
    return data
    
em_df = emission_data_dict_to_df(emission_data_dict)

def encode_source_target(df, source='Item', target='Variable'):
    
    le = LabelEncoder()
    
    encoded = le.fit_transform(df[[source, target]].values.flatten()).reshape(-1, 2)
       
    return encoded, le.classes_


def sankeyplot(df, source, target, valueformat='.00f', valuesuffix=' Gg(CO2eq)'):
    
    data = df.copy()
    
    encoded_s_t, classe_names = encode_source_target(data, source, target)
    
    data[['Source', 'Target']] = encoded_s_t
    
    fig = go.Figure(data=[go.Sankey(
        valueformat = valueformat,
        valuesuffix = valuesuffix,
        node = dict(
            pad = 15,
            thickness = 20,
            line = dict(color = "black", width = 0.5),
            label = classe_names,
            #color = "blue"
        ),
        link = dict(
            target = data['Target'],
            source= data['Source'],
            value = data['Value']
    ))])
    
    return fig

In [None]:
fig = sankeyplot(em_df, 'Item', 'Variable')
fig.update_layout(title_text=f"Agricultural Animal Emissions", font_size=10)