In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

import torch

import constant
import tools
import myplot
import features
from models import MLP_Regression, train_model, timeseries_kfold_validation_training, model_evaluation, model_improvement

SyntaxError: invalid syntax (models.py, line 177)

# 1. Load Files

In [2]:
settlement = pd.read_parquet(constant.settlement_2019_path)
dtype = {"GSRN":str,"Parent_GSRN":str,"Turbine_type":str,"BBR_municipal":str,"Placement":str,"grid":str}
windmill = pd.read_csv(constant.windmill_SC_path, dtype=dtype).sample(frac=0.005, random_state=1)

wu10 = pd.read_parquet(constant.wu10_path)
wv10 = pd.read_parquet(constant.wv10_path)
wu100 = pd.read_parquet(constant.wu100_path)
wv100 = pd.read_parquet(constant.wv100_path)

tmp2 = pd.read_parquet(constant.tmp2_path)
tmp100 = pd.read_parquet(constant.tmp100_path)

### Aggregate windmill and settlement

In [3]:
df = pd.merge(windmill, settlement, on="GSRN")

KeyboardInterrupt: 

# 2. Feature Extraction

In [None]:
features.fun_register(wu10, wv10, wu100, wv100, tmp2, tmp100)

In [None]:
df = features.parallelize_extract(df)

In [None]:
df,ply_models_rn,_ = features.extract_quantiles(df, "hws_uv_rn")
df,ply_models_wsr,_ = features.extract_quantiles(df, "hws_uv_wsr")

In [None]:
cols_filter = ["grid", "month", "hour", "GSRN", "Turbine_type", "Placement", "Parent_GSRN", "BBR_municipal", "TIME_CET", "predicted_ahead", 'UTM_x', 'UTM_y', 'VAERDI', 'max_VAERDI']

In [None]:
cols = [x for x in df.columns if x not in cols_filter]
df[cols] = tools.normalize_maxmin(df[cols])
df['VAERDI'] = df['VAERDI'] / df['max_VAERDI'] 

In [None]:
df["TIME_CET"] = pd.to_datetime(df["TIME_CET"])

In [None]:
date_s = pd.to_datetime("2019-12-05")
date_e = pd.to_datetime("2019-12-06")
df_train = df[df["TIME_CET"] < date_s].sort_values(["GSRN", "TIME_CET"]).reset_index(drop=True)
df_evl = df[df["TIME_CET"] >= date_s][df["TIME_CET"] < date_e].sort_values(["TIME_CET"]).reset_index(drop=True)

# NN Prediction and Evaluation

In [None]:
target = ["VAERDI"]
n_groups=5
lr=0.001
num_epochs=20
hidden_size = 1000
output_size = 1

## Original Features


In [None]:
cols = ['Capacity_kw', 'Navhub_height', 'Rotor_diameter', 'wu10', 'wv10', 'wu100', 'wv100', 'tmp2', 'tmp100',
'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9', 'm10', 'm11', 'm12',
'h0', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8', 'h9', 'h10', 'h11', 'h12', 'h13', 'h14', 'h15', 'h16', 'h17', 'h18', 'h19', 'h20', 'h21', 'h22', 'h23']

In [None]:
model_original = MLP_Regression(
  input_size=45,
  hidden_size=hidden_size
)
model_original, train_hist_original, test_hist_original = timeseries_kfold_validation_training(df=df_train, features=cols, target=target, n_groups=n_groups, model=model_original, lr=lr, num_epochs=num_epochs, save_name="SL_original_model.pth")

In [None]:
myplot.model_loss({"Train loss":train_hist_original, "Test loss": test_hist_original}, save_name="SL_original_loss.png")

### Evaluation by test
BIAS, MAE, MSE, RMSE

In [None]:
original_err = model_evaluation(df_evl, cols, model_original, save_name="SL_original_error.csv")

In [None]:
myplot.timelines(original_err["TIME_CET"],
            {"NBIAS":original_err["NBIAS"],
            "NMAE":original_err["NMAE"],
            "NMSE":original_err["NMSE"],
            "NRMSE":original_err["NRMSE"]}, save_name="SL_original_error.png")

# Prediction NN - Windshear

In [None]:
cols = ['Capacity_kw', 'Rotor_diameter','hws_u_wsr','hws_v_wsr','hws_uv_wsr','hws_uv_wsr^2','hws_uv_wsr^3','htmp_exp',
'hws_uv_wsr_q0.1','hws_uv_wsr_q0.3','hws_uv_wsr_q0.5','hws_uv_wsr_q0.7','hws_uv_wsr_q0.9',
'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9', 'm10', 'm11', 'm12',
'h0', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8', 'h9', 'h10', 'h11', 'h12', 'h13', 'h14', 'h15', 'h16', 'h17', 'h18', 'h19', 'h20', 'h21', 'h22', 'h23']

In [None]:
model_wsr = MLP_Regression(
  input_size=49,
  hidden_size=hidden_size
)
model_wsr, train_hist_wsr, test_hist_wsr = timeseries_kfold_validation_training(df=df_train, features=cols, target=target, n_groups=n_groups, model=model_wsr, lr=lr, num_epochs=num_epochs, save_name="SL_wsr_model.pth")

In [None]:
myplot.model_loss({"Train loss":train_hist_wsr, "Test loss": test_hist_wsr}, save_name="SL_wsr_loss.png")

### Evaluation by test
BIAS, MAE, MSE, RMSE

In [None]:
wsr_err = model_evaluation(df_evl, cols, model_wsr, save_name="SL_wsr_error.csv")

In [None]:
myplot.timelines(wsr_err["TIME_CET"],
            {"NBIAS":wsr_err["NBIAS"],
            "NMAE":wsr_err["NMAE"],
            "NMSE":wsr_err["NMSE"],
            "NRMSE":wsr_err["NRMSE"]}, save_name="SL_wsr_error.png")

# Prediction NN - Geo

In [None]:
cols = ['Capacity_kw', 'Rotor_diameter','hws_u_rn','hws_v_rn','hws_uv_rn','hws_uv_rn^2','hws_uv_rn^3','htmp_exp', 'Slope', 'Aspect',
'hws_uv_rn_q0.1','hws_uv_rn_q0.3','hws_uv_rn_q0.5','hws_uv_rn_q0.7','hws_uv_rn_q0.9',
'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9', 'm10', 'm11', 'm12',
'h0', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8', 'h9', 'h10', 'h11', 'h12', 'h13', 'h14', 'h15', 'h16', 'h17', 'h18', 'h19', 'h20', 'h21', 'h22', 'h23']

In [None]:
model_geo = MLP_Regression(
  input_size=51,
  hidden_size=1000
)
model_geo, train_hist_geo, test_hist_geo = timeseries_kfold_validation_training(df=df_train, features=cols, target=target, n_groups=n_groups, model=model_geo, lr=lr, num_epochs=num_epochs, save_name="SL_geo_model.pth")

In [None]:
myplot.model_loss({"Train loss":train_hist_geo, "Geo Test loss": test_hist_geo}, save_name="SL_geo_loss.png")

### Evaluation

In [None]:
geo_err = model_evaluation(df_evl, cols, model_geo, save_name="SL_geo_error.csv")

In [None]:
myplot.timelines(geo_err["TIME_CET"],
            {"NBIAS":geo_err["NBIAS"],
            "NMAE":geo_err["NMAE"],
            "NMSE":geo_err["NMSE"],
            "NRMSE":geo_err["NRMSE"]}, save_name="SL_geo_error.png")

# Prediction NN - Semi Geo

In [None]:
cols = ['Capacity_kw', 'Rotor_diameter','hws_u_wsr','hws_v_wsr','hws_uv_wsr','hws_uv_wsr^2','hws_uv_wsr^3','htmp_exp', 'Slope', 'Aspect',
'hws_uv_wsr_q0.1','hws_uv_wsr_q0.3','hws_uv_wsr_q0.5','hws_uv_wsr_q0.7','hws_uv_wsr_q0.9',
'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9', 'm10', 'm11', 'm12',
'h0', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8', 'h9', 'h10', 'h11', 'h12', 'h13', 'h14', 'h15', 'h16', 'h17', 'h18', 'h19', 'h20', 'h21', 'h22', 'h23']

In [None]:
model_semigeo = MLP_Regression(
  input_size=51,
  hidden_size=1000
)
model_semigeo, train_hist_semigeo, test_hist_semigeo = timeseries_kfold_validation_training(df=df_train, features=cols, target=target, n_groups=n_groups, model=model_semigeo, lr=lr, num_epochs=num_epochs, save_name="SL_semigeo_model.pth")

In [None]:
myplot.model_loss({"Train loss":train_hist_semigeo, "Geo Test loss": test_hist_semigeo}, save_name="SL_semigeo_loss.png")

### Evaluation

In [None]:
semigeo_err = model_evaluation(df_evl, cols, model_semigeo, save_name="SL_semigeo_error.csv")

In [None]:
myplot.timelines(semigeo_err["TIME_CET"],
            {"NBIAS":semigeo_err["NBIAS"],
            "NMAE":semigeo_err["NMAE"],
            "NMSE":semigeo_err["NMSE"],
            "NRMSE":semigeo_err["NRMSE"]}, save_name="SL_semigeo_error.png")

## Model Comparement

In [None]:
myplot.timelines(original_err["TIME_CET"], {"Original" : original_err["accuracy"], "WindShear" : wsr_err["accuracy"], "Geo" : geo_err["accuracy"], "SemiGeo" : semigeo_err["accuracy"],}, save_name="Model_compare.png")

In [None]:
imp, r2 = model_improvement(geo_err, original_err)
print(imp)
print(r2)