In [28]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import r2_score

import constant
from constant import org_cols, wsr_cols, geo_pow_cols, geo_log_cols, semigeo_cols
import tools
import myplot
import features
from models import MLP_Regression, train_model, train_test_validation, model_evaluation, model_improvement, get_days_error, NRMSE_all
path = "SL_SC_1/"
model_path = constant.model_path + path
plot_path = constant.plot_path + path

# Neural Network Train

In [29]:
class MLP_Regression(nn.Module):

  def __init__(self, input_size, hidden_size, f_active, bias=0):
    super(MLP_Regression, self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.fc1.bias.data.fill_(bias)
    self.fc2 = nn.Linear(hidden_size, hidden_size)
    self.fc3 = nn.Linear(hidden_size, 1)
    self.d = nn.Dropout(p=0.5)
    self.f_active = f_active

  def forward(self, x):
    x = self.f_active(self.fc1(x))
    # x = self.f_active(self.fc2(x))
    x = self.fc3(x)

    return x

In [30]:
def train_model(
    input_size,
    hidden_size,
    f_active,
    lr,
    num_epochs,
    X_train, 
    y_train
    ):

    model = MLP_Regression(
        input_size=input_size,
        hidden_size=hidden_size,
        f_active = f_active
        )

    optimiser = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = torch.nn.SmoothL1Loss()
    for t in range(num_epochs):
        y_pred = model(X_train)
        loss = loss_fn(y_pred.float(), y_train)

        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

    return model.eval()

In [31]:
df_train = pd.read_parquet(constant.SL_SC_TRAIN_path)

In [32]:
len(df_train)

445587

In [33]:
df_evl = pd.read_parquet(constant.SL_SC_EVL_path)
df_evl["TIME_CET"] = pd.to_datetime(df_evl["TIME_CET"])

In [34]:
len(df_evl)

37944

In [41]:
hidden_size = [700, 1000]
f_active = [F.sigmoid, F.relu]
lr = [0.01, 0.001]
num_epochs = [30, 40]
target = ["VAERDI"]

In [42]:
paras = []
for hs in hidden_size:
    for ac in f_active:
        for l in lr:
            for ep in num_epochs:
                paras.append([hs, ac, l, ep])

In [43]:
len(paras)

16

In [44]:
def Grid_Search(x_train_tensor, y_train_tensor, df_evl, paras, input_size, cols):
    grid_results = pd.DataFrame(columns=["NRMSE_all", "NRMSE_var", "R2"])
    for i, para in enumerate(paras):
        model = train_model(input_size = input_size,
                                hidden_size = para[0],
                                f_active = para[1],
                                lr = para[2],
                                num_epochs = para[3],
                                X_train = x_train_tensor, 
                                y_train = y_train_tensor)
        model_errs = get_days_error(df_evl, model, cols)

        model_NRMSE_all  = NRMSE_all(model_errs)
        r2 = round(r2_score(model_errs["VAERDI"], model_errs["pred"]), 2)

        grid_results = grid_results.append({"NRMSE_all":model_NRMSE_all, "NRMSE_var": model_errs["NRMSE"].std(), "R2":r2}, ignore_index=True)
        print(i, " Done")
    return grid_results

## Original Features

In [45]:
x_train, y_train = df_train[org_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

In [46]:
original_grids = Grid_Search(x_train_tensor, y_train_tensor, df_evl, paras, 45, org_cols)

0  Done
1  Done
2  Done
3  Done
4  Done
5  Done
6  Done
7  Done
8  Done
9  Done
10  Done
11  Done
12  Done
13  Done
14  Done
15  Done


In [47]:
original_grids.to_csv(model_path+"original_grids.csv")
original_grids

Unnamed: 0,NRMSE_all,NRMSE_var,R2
0,2.494173,5.945952,0.16
1,25.828534,9.009301,-0.92
2,17.964613,2.983801,-0.39
3,3.254622,5.034674,0.17
4,48.018929,1.524675,-3.71
5,26.468518,1.432544,-0.49
6,16.636898,2.427307,-0.09
7,21.732142,1.476118,-0.18
8,55.89937,1.560212,-5.35
9,60.026715,1.376971,-6.09


In [49]:
paras[13]

[1000, <function torch.nn.functional.relu(input, inplace=False)>, 0.01, 40]

## Geo_power Features

In [14]:
x_train, y_train = df_train[geo_pow_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

In [15]:
geo_power_grids = Grid_Search(x_train_tensor, y_train_tensor, df_evl, paras, 46, geo_pow_cols)

0  Done
1  Done
2  Done
3  Done
4  Done
5  Done
6  Done
7  Done
8  Done
9  Done
10  Done
11  Done
12  Done
13  Done
14  Done
15  Done


In [26]:
geo_power_grids.to_csv(model_path+"geo_power_grids.csv")
geo_power_grids

Unnamed: 0,NRMSE_all,NRMSE_var,R2
0,1437.392331,162.287504,-886220.04
1,1089.466996,106.423302,-510726.36
2,9.972991,0.979482,-57.54
3,25.823956,2.865919,-285.74
4,532.377211,53.331457,-122900.16
5,495.70265,80.688738,-107756.56
6,131.678412,13.410671,-7669.12
7,9.292895,4.770703,-125.75
8,1677.475506,162.127441,-1211613.66
9,249.722919,12.758064,-28027.33


In [23]:
paras[6]

[700, <function torch.nn.functional.relu(input, inplace=False)>, 0.001, 20]

# Geo_Log Features

In [None]:
x_train, y_train = df_train[geo_log_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

In [None]:
geo_log_grids = Grid_Search(x_train_tensor, y_train_tensor, df_evl, paras, 46, geo_log_cols)

In [None]:
geo_log_grids.to_csv(model_path+"geo_log_grids.csv")
geo_log_grids

# WindShear

In [17]:
x_train, y_train = df_train[wsr_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

NameError: name 'df_train_B' is not defined

In [None]:
wsr_grids = Grid_Search(x_train_tensor, y_train_tensor,df_evl, paras, 44, wsr_cols)

In [None]:
wsr_grids.to_csv(model_path+"wsr_grids.csv")
wsr_grids

In [42]:
paras[6]

[700, <function torch.nn.functional.relu(input, inplace=False)>, 0.001, 20]

# Semigeo

In [None]:
x_train, y_train = df_train[semigeo_cols].values, df_train[target].values
x_train_tensor = torch.tensor(x_train, dtype = torch.float)
y_train_tensor = torch.tensor(y_train, dtype = torch.float)

In [None]:
semigeo_grids = Grid_Search(x_train_tensor, y_train_tensor,df_evl, paras, 47, semigeo_cols)

In [None]:
semigeo_grids.to_csv(model_path+"semigeo_grids.csv")

In [None]:
semigeo_grids

In [43]:
paras[3]

[700, <function torch.nn.functional.sigmoid(input)>, 0.001, 30]