In [1]:
import os
path_parent = os.path.dirname(os.getcwd())
os.chdir(path_parent)

In [2]:
from ggmodel.graphmodel import GraphModel, concatenate_graph_specs
from ggmodel.validation import score_model, plot_diagnostic

from data_utils.database import select_dataset
from data_utils.utils import get_X_y_from_data, data_dict_from_df_tables
from models.landuse.BE2 import model_dictionnary

import pandas as pd
import numpy as np

  from pkg_resources import parse_version  # type: ignore


# Model developement notebook

The goal of this notebook is to develop and improve the landuse model.

Remarks:

- comment 1
- comment 2
- comment 3

## Data

In [3]:
def prepare_landuse_model_data():

    df_tables = ['foodbalancesheet_new', 'population', 'crops', 'landuse']

    df_dictionnary = {df_table: select_dataset(df_table) for df_table in df_tables}

    df_dictionnary['foodbalancesheet_new'] = df_dictionnary['foodbalancesheet_new'][~df_dictionnary['foodbalancesheet_new'].Variable.isin(['Domestic supply quantity'])]

    df_dictionnary['foodbalancesheet_new'] = df_dictionnary['foodbalancesheet_new'].drop(columns=['group']) # for now only crop is considered
    
    data_dict = data_dict_from_df_tables([df for _, df in df_dictionnary.items()])

    data_dict['CL_baseline'] = data_dict['CL']
    data_dict['IL_baseline'] = data_dict['IL']
    data_dict['FL_baseline'] = data_dict['FL']
    data_dict['R_rate'] = pd.Series(data=0, index=data_dict['CL_baseline'].index)
    data_dict['CL_corr_coef'] = pd.Series(data=1.4, index=data_dict['CL_baseline'].index)

    def preprocess_dict_item(key, value):

        return (
            value.fillna(0)
        )

    
    data_dict = {k: preprocess_dict_item(k, v) for k, v in data_dict.items()}

    return data_dict

data_dict = prepare_landuse_model_data()

In [4]:
ISO_data_dict = {k: v.loc[['FRA', 'HUN', 'DEU', 'NLD', 'BEL', 'CHE', 'BRA', 'SEN'], [2016, 2017, 2018], :] for k, v in data_dict.items()}


## Model

In [5]:
for model_name, model in model_dictionnary.items():
    print(model)
    try:
        X, y_true = get_X_y_from_data(model, ISO_data_dict)
        print(score_model(model, X, y_true))
    except Exception as e:
        print(e)



FPi
TCLDi
         r2  correlation          rmse Variable
0  0.043805     0.211274  4.472898e+08      FPi
1  0.932502     0.965834  2.458636e+11    TCLDi

FPi
TCLDi
         r2  correlation          rmse Variable
0  0.043805     0.211274  4.472898e+08      FPi
1  0.932502     0.965834  2.458636e+11    TCLDi

FPi
         r2  correlation          rmse Variable
0  0.043805     0.211274  4.472898e+08      FPi

IL
FL
    r2  correlation  rmse Variable
0  1.0          1.0   0.0       IL
1  1.0          1.0   0.0       FL

Empty DataFrame
Columns: []
Index: []

FL
IL
CL
TCLDi
FPi
         r2  correlation          rmse Variable
0  0.996737     0.999989  1.205721e+08       FL
1 -5.342468     0.837874  4.789474e+05       IL
2  0.758389     0.996279  1.213212e+08       CL
3  0.932502     0.965834  2.458636e+11    TCLDi
4  0.043805     0.211274  4.472898e+08      FPi


## Scenario

In [14]:
from ggmodel.projection import *


def run_BE2_projection(data_dict):
    data_dict = data_dict.copy()
    
    projection_dict = {
        'FDi': lambda x: apply_itemized_ffill_projection(x),
        'SSRi': lambda x: apply_itemized_ffill_projection(x),
        'SVi': lambda x: apply_itemized_ffill_projection(x),
        'RDi': lambda x: apply_itemized_ffill_projection(x),
        'NFDi': lambda x: apply_itemized_ffill_projection(x),
        'FEi': lambda x: apply_itemized_ffill_projection(x),
        'FIi': lambda x: apply_itemized_ffill_projection(x),
        'PDi': lambda x: apply_itemized_ffill_projection(x),
        'FDi': lambda x: apply_itemized_ffill_projection(x),
        'SDi': lambda x: apply_itemized_ffill_projection(x),
        'TLA': lambda x: apply_ffill_projection(x),
        'CL_baseline': lambda x: apply_ffill_projection(x),
        'IL_baseline': lambda x: apply_ffill_projection(x),
        'FL_baseline': lambda x: apply_ffill_projection(x),
        'Pop': lambda x: apply_ffill_projection(x),
        'CL_corr_coef': lambda x: apply_ffill_projection(x),
     }
     
    
    for variable, function in projection_dict.items():
        data_dict[variable] = function(data_dict[variable])

    return data_dict


def run_BE2_scenario(data_dict, FDKGi_target=1, FLOi_target=1, CYi_target=1, R_rate=1):

    data_dict = data_dict.copy()

    projection_dict = {
        'CYi': lambda x: apply_itemized_percent_target_projection(x, CYi_target),
        'FDKGi': lambda x: apply_itemized_percent_target_projection(x, FDKGi_target),
        'FLOi': lambda x: apply_itemized_percent_target_projection(x, FLOi_target),
        'R_rate': lambda x: apply_constant_projection(x, R_rate)
    }

    for variable, function in projection_dict.items():
        data_dict[variable] = function(data_dict[variable])

    results = model_dictionnary['BE2_model'].run(data_dict)

    return results

In [15]:
%%time

ISO = 'FRA'
data_dict_filtered = {k: v.loc[ISO, 2000:] for k, v in data_dict.items()}

proj_data_dict = run_BE2_projection(data_dict_filtered)

res = run_BE2_scenario(proj_data_dict)

CPU times: user 1.47 s, sys: 940 µs, total: 1.47 s
Wall time: 1.47 s
