In [1]:
import mlflow
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error as mape_score

from corn_utils import (
    load_all_csvs,
    load_all_jsons,
    join_csvs_and_filter_by_year,
    unnorm_a_column,
    get_scores
)

  """Entry point for launching an IPython kernel.


In [2]:
X_train_lst, y_train_lst, X_test_lst, y_test_lst = [],[],[],[]

for state in [
    'Minnesota',
#     'South_Dakota',
#     'North_Dakota',
    'Nebraska',
    'Iowa'
]:
    df_meteo = load_all_jsons(state=state)
    df_target = join_csvs_and_filter_by_year(load_all_csvs(state=state.upper()))
    
    df = pd.concat([
        df_meteo[['NLST', 'Nndvi', 'Npr', 'Ntemp','Nvpd']].groupby(level=0).mean(),
        df_target[[
#             'Nharvest',
            'Nprod',
        ]].groupby(level=0).sum(),
        df_target[[
            'Nyield'
        ]].groupby(level=0).mean(),
    ], axis=1)

    XY = df.to_numpy()
    X_train_el, y_train_el = XY[:-4,:5], XY[:-4,5:]
    X_test_el, y_test_el = XY[:-4:-1,:5][::-1,:], XY[:-4:-1,5:][::-1,:] # 2020,2021,2022
    
    X_train_lst.append(X_train_el)
    y_train_lst.append(y_train_el)
    X_test_lst.append(X_test_el)
    y_test_lst.append(y_test_el)
    
X_train = np.concatenate(X_train_lst)
y_train = np.concatenate(y_train_lst)
X_test = np.concatenate(X_test_lst)
y_test = np.concatenate(y_test_lst)

In [3]:
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

@ignore_warnings(category=ConvergenceWarning)
def full_training_eval_loop(
    hidden_layer_sizes=(5)
):

    with mlflow.start_run(
        run_name=str(hidden_layer_sizes),
        tags={"hidden_layer_sizes": str(hidden_layer_sizes)},
    ):
        mlflow.log_param("hidden_layer_sizes", str(hidden_layer_sizes))
        
        mlflow.log_param(
            "randomstatelst",
            '4691,6231,7298,2554,7856,5509,9803,7778,4314,9261,4436,4883'
        )

        dd, ddtrain, losscurve = [], [], []

        for random_state in [
            4691,6231,7298,2554,
            7856,5509,9803,7778,
            4314,9261,4436,4883
        ]:

            regr = MLPRegressor(
            #     batch_size=3,
                random_state=random_state,
                learning_rate_init=0.01,
                max_iter=10000,
                hidden_layer_sizes=hidden_layer_sizes,
                validation_fraction=0.1,
                n_iter_no_change=10000,
            ).fit(X_train, y_train)
            
            losscurve.append(regr.loss_curve_)

            y_test_nonull = y_test[~np.isnan(y_test).any(axis=1), :]
            y_pred_nonull = regr.predict(X_test)[~np.isnan(y_test).any(axis=1), :]

            d = get_scores(
                y_test_nonull,
                y_pred_nonull,
                df_target
            )

            dd.append(d)

            dtrain = get_scores(
                y_train,
                regr.predict(X_train),
                df_target
            )

            ddtrain.append(dtrain)

        avg_d_test = {'r2': [
            np.mean([d['r2'][0] for d in dd]),
            np.mean([d['r2'][1] for d in dd])
        ], 'mape': [
            np.mean([d['mape'][0] for d in dd]),
            np.mean([d['mape'][1] for d in dd])
        ]}

        avg_d_train = {'r2': [
            np.mean([d['r2'][0] for d in ddtrain]),
            np.mean([d['r2'][1] for d in ddtrain])
        ], 'mape': [
            np.mean([d['mape'][0] for d in ddtrain]),
            np.mean([d['mape'][1] for d in ddtrain])
        ]}
        
        median_loss_curve = np.asarray(losscurve).mean(axis=0)
        
        for step,val in enumerate(median_loss_curve):
            mlflow.log_metric(key='loss', value=val, step=step)
            
        mlflow.log_metric('r2trainProd', avg_d_train['r2'][0])
        mlflow.log_metric('r2trainYield', avg_d_train['r2'][1])
        mlflow.log_metric('r2testProd', avg_d_test['r2'][0])
        mlflow.log_metric('r2testYield', avg_d_test['r2'][1])
        
        mlflow.log_metric('MAPEtrainProd', avg_d_train['mape'][0])
        mlflow.log_metric('MAPEtrainYield', avg_d_train['mape'][1])
        mlflow.log_metric('MAPEtestProd', avg_d_test['mape'][0])
        mlflow.log_metric('MAPEtestYield', avg_d_test['mape'][1])

        return avg_d_test, avg_d_train

In [4]:
full_training_eval_loop(hidden_layer_sizes=(5))

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



({'r2': [0.9187248448149578, 0.8032725538881978],
  'mape': [3.688003650101692, 6.035560390158171]},
 {'r2': [0.9638804155515123, 0.9657376872982001],
  'mape': [2.261633346052282, 2.9633168199345463]})

In [5]:
full_training_eval_loop(hidden_layer_sizes=(10))

({'r2': [0.8867261656935318, 0.7833908935448309],
  'mape': [4.3792956593209, 6.04984021504762]},
 {'r2': [0.9841313807687465, 0.980728844356478],
  'mape': [1.5216037141078285, 2.4836567258030224]})

In [6]:
full_training_eval_loop(hidden_layer_sizes=(5,5))

({'r2': [0.9084100737640414, 0.7858935830376033],
  'mape': [4.341956442113097, 6.638426079509535]},
 {'r2': [0.9768902828429042, 0.968197584214134],
  'mape': [1.6403949568362932, 2.3696076760873677]})

In [7]:
full_training_eval_loop(hidden_layer_sizes=(5,3))

({'r2': [0.8929061257702265, 0.8201836097434877],
  'mape': [4.693483821355433, 5.562554719693572]},
 {'r2': [0.958449175515267, 0.9638004020487957],
  'mape': [2.2816156756107957, 3.1342818269191657]})

In [8]:
full_training_eval_loop(hidden_layer_sizes=(5,5,5))

({'r2': [0.8831590762952626, 0.6522943992344461],
  'mape': [4.916295215886383, 8.01487337834953]},
 {'r2': [0.9958424223528003, 0.9802106720653906],
  'mape': [0.6055672473114667, 1.9924026510876356]})

In [9]:
full_training_eval_loop(hidden_layer_sizes=(5,5,5,5))

({'r2': [0.9275366830996891, 0.8790743156811235],
  'mape': [3.4488343685961538, 4.498669933157642]},
 {'r2': [0.9843236705054877, 0.9805189408705138],
  'mape': [1.0048698397445814, 1.9613099010753634]})