In [23]:
import time
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import r2_score

import constant
from constant import org_cols, wsr_cols, geo_pow_cols, geo_log_cols, semigeo_cols
import tools
import myplot
import features
from models import MLP_Regression, train_model, train_test_validation, model_evaluation, model_improvement, get_days_error, NRMSE_all
path = "SL_SC_1/"
model_path = constant.model_path + path
plot_path = constant.plot_path + path

# Load Files

In [2]:
settlement = pd.read_parquet(constant.settlement_train_path)
dtype = {"GSRN":str,"Parent_GSRN":str,"Turbine_type":str,"BBR_municipal":str,"Placement":str,"grid":str}
windmill = pd.read_csv(constant.windmill_SL_SC_path, dtype=dtype).sample(frac=0.05, random_state=1)

wu10 = pd.read_parquet(constant.wu10_path)
wv10 = pd.read_parquet(constant.wv10_path)
wu100 = pd.read_parquet(constant.wu100_path)
wv100 = pd.read_parquet(constant.wv100_path)

tmp2 = pd.read_parquet(constant.tmp2_path)
tmp100 = pd.read_parquet(constant.tmp100_path)

In [3]:
len(windmill)

48

In [4]:
df = pd.merge(windmill, settlement, on="GSRN")

# Feature Extraction

In [5]:
features.fun_register(wu10, wv10, wu100, wv100, tmp2, tmp100)

In [6]:
df = features.parallelize_extract(df)

parallelize_extract time:  2849.281149864197


In [2]:
df.to_parquet("./traintestdata/SL_SC.parquet", index=False)

NameError: name 'df' is not defined

In [24]:
df = pd.read_parquet("./traintestdata/SL_SC.parquet")

In [25]:
cols_filter = ["grid", "month", "hour", "GSRN", "Turbine_type", "Placement", "Parent_GSRN", "BBR_municipal", "TIME_CET", "predicted_ahead", 'UTM_x', 'UTM_y', 'VAERDI', 'max_VAERDI', 'In_service']

In [26]:
df['VAERDI'] = df['VAERDI'] / df['max_VAERDI'] 
df["TIME_CET"] = pd.to_datetime(df["TIME_CET"])

In [27]:
cols = [x for x in df.columns if x not in cols_filter]
df[cols] = tools.normalize_zcenter(df[cols])
date_s = pd.to_datetime("2019-03-01")
df_train = df[df["TIME_CET"] < date_s].sort_values(["TIME_CET", "GSRN"]).reset_index(drop=True)
df_evl = df[df["TIME_CET"] >= date_s].sort_values(["TIME_CET", "GSRN"]).reset_index(drop=True)

In [28]:
df_train.to_parquet(constant.SL_SC_TRAIN_path, index=False)
df_evl.to_parquet(constant.SL_SC_EVL_path, index=False)

In [7]:
df = None

# Neural Network Train

In [29]:
df_train = pd.read_parquet(constant.SL_SC_TRAIN_path)
len(df_train)

445587

In [30]:
df_evl = pd.read_parquet(constant.SL_SC_EVL_path)
df_evl["TIME_CET"] = pd.to_datetime(df_evl["TIME_CET"])
len(df_evl)

37944

## Grid_Search

In [74]:
hidden_size = [500, 600]
f_active = [F.tanh, F.leaky_relu, F.relu]
lr = [0.01, 0.001]
num_epochs = [30, 40]
target = ["VAERDI"]

In [75]:
paras = []
for hs in hidden_size:
    for ac in f_active:
        for l in lr:
            for ep in num_epochs:
                paras.append([hs, ac, l, ep])
len(paras)

24

In [76]:
pd.DataFrame(paras).to_csv("paras.csv")

In [77]:
def Grid_Search(x_train_tensor, y_train_tensor, df_evl, df_train, paras, input_size, cols):
    grid_results = pd.DataFrame(columns=["NRMSE_train_all","NRMSE_evl_all", "NRMSE_train_var", "NRMSE_evl_var", "R2_train", "R2_evl", "time"])
    for i, para in enumerate(paras):
        start = time.time()
        model, _, _ = train_model(input_size = input_size,
                                hidden_size = para[0],
                                f_active = para[1],
                                lr = para[2],
                                num_epochs = para[3],
                                X_train = x_train_tensor, 
                                y_train = y_train_tensor,
                                loss_record=False)
        end = time.time()

        model_train_errs = model_evaluation(df_train, cols, model, days=True)
        model_evl_errs = model_evaluation(df_evl, cols, model, days=True)
        
        r2_train = round(r2_score(model_train_errs["VAERDI"], model_train_errs["pred"]), 2)
        r2_evl = round(r2_score(model_evl_errs["VAERDI"], model_evl_errs["pred"]), 2)

        grid_results = grid_results.append({"NRMSE_train_all": NRMSE_all(model_train_errs),
                                            "NRMSE_evl_all": NRMSE_all(model_evl_errs),
                                            "NRMSE_train_var": model_train_errs["NRMSE"].std(), 
                                            "NRMSE_evl_var": model_evl_errs["NRMSE"].std(), 
                                            "R2_train": r2_train, 
                                            "R2_evl": r2_evl,
                                            "time": end - start}, 
                                            ignore_index=True)
        print(i, " Done")
    return grid_results

In [78]:
filtet_sort = lambda x: x[x["NRMSE_train_all"]>x["NRMSE_evl_all"]].sort_values("NRMSE_evl_all")

## Original Features

In [79]:
x_train, y_train = df_train[org_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

In [80]:
original_grids = Grid_Search(x_train_tensor, y_train_tensor, df_evl, df_train, paras, 45, org_cols)

0  Done
1  Done
2  Done
3  Done
4  Done
5  Done
6  Done
7  Done
8  Done
9  Done
10  Done
11  Done
12  Done
13  Done
14  Done
15  Done
16  Done
17  Done
18  Done
19  Done
20  Done
21  Done
22  Done
23  Done


In [81]:
original_grids.to_csv(model_path + "original_grids.csv", index=False)

In [82]:
filtet_sort(original_grids)

Unnamed: 0,NRMSE_train_all,NRMSE_evl_all,NRMSE_train_var,NRMSE_evl_var,R2_train,R2_evl,time
21,49.242375,1.125177,0.306169,2.032903,0.62,0.87,140.497334
2,13.666005,2.453326,0.831862,3.892718,0.18,0.21,123.939918
16,63.743537,4.263394,0.765645,4.098302,0.35,0.46,176.452258
14,9.564849,4.284871,0.790655,2.945447,0.22,0.17,152.438668
0,10.586077,8.651131,0.691107,2.295389,0.14,0.08,158.087064
9,25.55382,10.021348,0.524216,1.853533,0.73,0.51,145.477594
22,12.953725,12.90921,0.439055,1.776191,0.55,0.12,105.448136
8,65.776124,32.443847,0.68068,0.746543,0.14,-1.21,115.938078
20,79.062527,36.010206,0.810084,0.897613,-0.13,-1.75,105.5686


In [98]:
paras[21]

[600, <function torch.nn.functional.relu(input, inplace=False)>, 0.01, 40]

# WindShear

In [87]:
x_train, y_train = df_train[wsr_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

In [88]:
wsr_grids = Grid_Search(x_train_tensor, y_train_tensor, df_evl, df_train, paras, 44, wsr_cols)

0  Done
1  Done
2  Done
3  Done
4  Done
5  Done
6  Done
7  Done
8  Done
9  Done
10  Done
11  Done
12  Done
13  Done
14  Done
15  Done
16  Done
17  Done
18  Done
19  Done
20  Done
21  Done
22  Done
23  Done


In [89]:
wsr_grids.to_csv(model_path+"wsr_grids.csv")

In [90]:
filtet_sort(wsr_grids)

Unnamed: 0,NRMSE_train_all,NRMSE_evl_all,NRMSE_train_var,NRMSE_evl_var,R2_train,R2_evl,time
3,9.996591,0.368964,0.144374,0.191645,0.95,0.96,181.08539
12,1.766767,1.123484,0.223079,0.49022,0.85,0.92,170.902654
5,4.539607,3.015303,0.140445,0.515707,0.95,0.93,194.019592
9,3.792496,3.14445,0.155199,0.198039,0.93,0.94,128.815995
17,8.730232,3.291914,0.091491,0.257086,0.92,0.92,203.543437
23,22.174051,5.024781,0.179704,0.205849,0.89,0.87,138.247076
2,13.253502,7.34708,0.241306,0.38766,0.93,0.87,135.492389
18,37.178641,9.048733,0.384206,0.243918,0.81,0.8,136.374773
19,15.781747,9.529234,0.171534,0.376285,0.92,0.74,162.425164
22,14.552281,13.203876,0.232679,0.48669,0.91,0.62,103.311739


In [96]:
paras[3]

[500, <function torch.nn.functional.tanh(input)>, 0.001, 40]

## Geo_power Features

In [83]:
x_train, y_train = df_train[geo_pow_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

In [84]:
geo_power_grids = Grid_Search(x_train_tensor, y_train_tensor, df_evl, df_train, paras, 46, geo_pow_cols)

0  Done
1  Done
2  Done
3  Done
4  Done
5  Done
6  Done
7  Done
8  Done
9  Done
10  Done
11  Done
12  Done
13  Done
14  Done
15  Done
16  Done
17  Done
18  Done
19  Done
20  Done
21  Done
22  Done
23  Done


In [None]:
geo_power_grids.to_csv(model_path + "geo_power_grids.csv")

In [100]:
filtet_sort(geo_power_grids)

Unnamed: 0,NRMSE_train_all,NRMSE_evl_all,NRMSE_train_var,NRMSE_evl_var,R2_train,R2_evl,time
0,2.371915,0.283493,0.115722,0.164086,0.95,0.96,106.348312
3,1.296326,1.055801,0.150239,0.269532,0.96,0.97,204.460937
21,5.424458,2.569548,0.138247,0.160998,0.94,0.95,162.818763
15,7.900909,3.228936,0.22231,0.422791,0.95,0.94,200.097982
18,23.809251,5.879459,0.20844,0.380745,0.86,0.83,147.429451
2,9.447065,6.07594,0.338269,0.829556,0.93,0.89,130.840174
9,38.20295,6.270224,0.134969,0.365773,0.83,0.9,148.235072
22,8.182518,6.477658,0.188568,0.278712,0.92,0.84,126.54964
10,16.770026,6.566142,0.169668,0.21635,0.89,0.79,98.600505
5,11.45955,9.184893,0.16214,0.572221,0.91,0.81,191.32314


In [101]:
paras[0]

[500, <function torch.nn.functional.tanh(input)>, 0.01, 30]

# Semigeo

In [91]:
x_train, y_train = df_train[semigeo_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

In [92]:
semigeo_grids = Grid_Search(x_train_tensor, y_train_tensor,df_evl, df_train, paras, 47, semigeo_cols)

0  Done
1  Done
2  Done
3  Done
4  Done
5  Done
6  Done
7  Done
8  Done
9  Done
10  Done
11  Done
12  Done
13  Done
14  Done
15  Done
16  Done
17  Done
18  Done
19  Done
20  Done
21  Done
22  Done
23  Done


In [93]:
semigeo_grids.to_csv(model_path+"semigeo_grids.csv")

In [94]:
filtet_sort(semigeo_grids)

Unnamed: 0,NRMSE_train_all,NRMSE_evl_all,NRMSE_train_var,NRMSE_evl_var,R2_train,R2_evl,time
7,16.679849,0.975444,0.147859,0.149801,0.92,0.96,156.741829
13,6.277858,1.957414,0.184565,0.160267,0.95,0.94,196.505705
12,3.972796,2.247181,0.270251,0.9708,0.86,0.86,166.864298
17,8.554809,2.559902,0.059527,0.081036,0.94,0.96,197.460035
10,8.539603,3.665428,0.085644,0.278185,0.94,0.92,97.129944
14,12.957412,7.786517,0.192642,0.222623,0.94,0.86,145.190564
22,31.982325,8.37157,0.206957,0.726034,0.86,0.84,121.622754
2,12.477438,8.676419,0.291857,0.301593,0.91,0.84,124.100292
18,11.589963,9.963492,0.154327,0.329464,0.91,0.75,157.133357
6,29.24237,10.981435,0.230084,0.439293,0.84,0.69,130.235646


In [95]:
paras[7]

[500,
 <function torch.nn.functional.leaky_relu(input, negative_slope=0.01, inplace=False)>,
 0.001,
 40]