# Addestramento modelli paper

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import numpy as np
import pandas as pd
import os

In [5]:
import matplotlib.pyplot as plt

In [7]:
!pip install pydot graphviz

Collecting pydot
  Downloading pydot-1.4.2-py2.py3-none-any.whl (21 kB)
Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
Installing collected packages: pydot, graphviz
Successfully installed graphviz-0.20.1 pydot-1.4.2


In [9]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dropout, Concatenate, Input, GlobalAveragePooling1D
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from keras.layers.merge import concatenate
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

### Definizione funzioni d'utilità

In [10]:
# Funzione per lo split di una sequenza temporale (series), data una percentuale (perc_train)
def train_val_split(x, y, perc_train):
    time_step = int(len(x) * perc_train / 100)
    x_train = x[:time_step]
    y_train = y[:time_step]
    x_test = x[time_step:]
    y_test = y[time_step:]
    return x_train, y_train, x_test, y_test

In [11]:
# Funzione per la creazione del dataset di training
# window_size_past = lunghezza input (es. con window_size_past=48 si considerano 48 ore, 2 giorni, per la predizione)
# window_size_future = lunghezza output (es. con window_size_future=24, vogliamo prevedere il carico delle prossime 24 ore)
def windowed_dataset(series, window_size_past, window_size_future, step=24):
    x = []
    y = []
    for i in range(0, len(series)-window_size_past, step):
        x.append(series[i:i+window_size_past])
        y.append(series[i+window_size_past:i+window_size_past+window_size_future])
    return np.array(x), np.array(y)

In [12]:
def plot_predict(values_true, values_predict, c1, c2, title, xlabel, ylabel, dim):
                
    plt.figure(figsize=dim) 
    plt.grid(color=c2, linestyle="-", linewidth="0.2", axis="y")
    
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    
    len_x = len(values_true)
    xvalue = [x for x in range(0,len_x)]  

    plt.plot(xvalue, values_true, color=c1, linewidth="2")
    plt.plot(xvalue, values_predict, color="red", linewidth="2")
    
    #file_name = "".join(title.lower()).replace(" ", "_")
    #path = make_path(plot_path, file_name)
    #plt.savefig(path, bbox_inches='tight', transparent=True)
    plt.show()

### Import dei dati e reshaping

In [13]:
dataset_path = os.path.join("Dataset", "dataset_addestramento.csv")
df = pd.read_csv(dataset_path)
df

Unnamed: 0,Year,Month,Day,Hour,PowerL1,PowerL2,PowerL3,DayOfWeek
0,2020,5,1,0,782.501983,896.650392,773.000000,4
1,2020,5,1,1,937.390867,888.718967,804.916667,4
2,2020,5,1,2,785.457063,889.193792,767.041667,4
3,2020,5,1,3,1035.463429,896.947979,770.708333,4
4,2020,5,1,4,791.389254,897.279196,770.958333,4
...,...,...,...,...,...,...,...,...
18259,2022,5,31,19,1758.702608,2371.511483,1774.948292,1
18260,2022,5,31,20,1634.401029,1976.749562,1464.862742,1
18261,2022,5,31,21,1742.510279,1890.756617,1394.751854,1
18262,2022,5,31,22,1631.491646,1839.695404,1365.817237,1


In [14]:
onehot_encoder = OneHotEncoder()
onehot_hour = onehot_encoder.fit_transform(np.array(df["Hour"]).reshape(-1,1)).toarray()
onehot_dayofweek = onehot_encoder.fit_transform(np.array(df["DayOfWeek"]).reshape(-1,1)).toarray()

In [15]:
powerL1 = df['PowerL1'].values
powerL2 = df['PowerL2'].values
powerL3 = df['PowerL3'].values

In [16]:
scaler = MinMaxScaler()
data_powerL1 = scaler.fit_transform(powerL1.reshape(-1, 1))
data_powerL2 = scaler.fit_transform(powerL2.reshape(-1, 1))
data_powerL3 = scaler.fit_transform(powerL3.reshape(-1, 1))

In [17]:
# FINESTRE
# window_size_past = ore utilizzate per la previsione futura
# window_size_future = ore successive da prevedere
window_size_past = 7 * 24
#window_size_future = 24
step = 24

x_load = []
x_other_feature = []
y = []

for i in range(0, len(df)-window_size_past, step):
    
    x_L1 = data_powerL1[i:i+window_size_past]
    x_L2 = data_powerL2[i:i+window_size_past]
    x_L3 = data_powerL3[i:i+window_size_past]
    
    for i in range(step):
    
        y_L1 = data_powerL1[i+window_size_past]
        y_L2 = data_powerL1[i+window_size_past]
        y_L3 = data_powerL1[i+window_size_past]

        x_dayofweek = onehot_dayofweek[i+window_size_past]
        x_hour = onehot_hour[i+window_size_past]
        
        day_hour = np.concatenate((x_dayofweek, x_hour))
        
        x_load.append(x_L1)
        x_other_feature.append(day_hour)
        y.append(y_L1)

x_load = tf.stack(x_load)
x_other_feature = tf.stack(x_other_feature)
y = tf.stack(y)

In [18]:
#x_train, y_train, x_test, y_test = train_val_split(x, y, 80)

### PAPER13

In [19]:
# PIPELINE LOAD
input_load = Input(shape=(window_size_past, 1), name="input_load")

# Pipeline 1
branch1_1 = Conv1D(filters=64, kernel_size=3, activation='relu', name="branch1_1")(input_load)
branch1_2 = MaxPooling1D(pool_size=2, strides=2, name="branch1_2")(branch1_1)

# Pipeline 2
branch2_1 = Conv1D(filters=64, kernel_size=5, activation='relu', name="branch2_1")(input_load)
branch2_2 = MaxPooling1D(pool_size=2, strides=2, name="branch2_2")(branch2_1)

# Pipeline 3
branch3_1 = Conv1D(filters=64, kernel_size=7, activation='relu', name="branch3_1")(input_load)
branch3_2 = MaxPooling1D(pool_size=2, strides=2, name="branch3_2")(branch3_1)

# Concatenate pipelines
concatenated_layer1 = concatenate([branch1_2,branch2_2,branch3_2], axis=1, name="concatenated_layer1")

lstm_layer = LSTM(64, return_sequences=True, name="lstm_layer")(concatenated_layer1)

global_average_pooling_layer = GlobalAveragePooling1D(name="global_average_pooling_layer")(lstm_layer)

In [20]:
# PIPELINE OTHER FEATURE
input_other_feature = Input(shape=(31, ), name="input_other_feature")
dense1 = Dense(64, activation="relu", name="dense1")(input_other_feature)
flatten1 = Flatten(name="flatten1")(dense1)

In [21]:
# CONCAT PIPLINES
concatenated_layer2 = concatenate([global_average_pooling_layer,flatten1], axis=1, name="concatenated_layer2")

dense2 = Dense(64, activation="relu", name="dense2")(concatenated_layer2)
dense3 = Dense(64, activation="relu", name="dense3")(dense2)
output_layer = Dense(1, activation="softmax", name="output_layer")(dense3)

model = Model(inputs=[input_load, input_other_feature], outputs=[output_layer])

In [None]:
# from tensorflow import keras
# keras.utils.plot_model(model, "architecture.png", show_shapes=True)

In [22]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_load (InputLayer)        [(None, 168, 1)]     0           []                               
                                                                                                  
 branch1_1 (Conv1D)             (None, 166, 64)      256         ['input_load[0][0]']             
                                                                                                  
 branch2_1 (Conv1D)             (None, 164, 64)      384         ['input_load[0][0]']             
                                                                                                  
 branch3_1 (Conv1D)             (None, 162, 64)      512         ['input_load[0][0]']             
                                                                                              

In [None]:
optimizer = Adam(learning_rate=0.1)
model.compile(optimizer=optimizer, loss="mean_squared_error", metrics=["mean_absolute_percentage_error"])

In [None]:
model.fit([x_load, x_other_feature], y, epochs=5, batch_size=32)

In [None]:
prediction = model.predict(x_test[-1].reshape(1, window_size_past, 1))

In [None]:
forecast = scaler.inverse_transform(np.array(prediction).reshape(-1, 1))

In [None]:
true = scaler.inverse_transform(y_test[-1])

In [None]:
title = "Curva di carico giornaliera"
xlabel = "TotalPower"
ylabel = "W"
dim = (15,5)
c1 = "#2271b3"
c2 = "#000000"
plot_predict(true.reshape(1,window_size_future)[0], forecast.reshape(1,window_size_future)[0], c1, c2, title, xlabel, ylabel, dim)

### TEST

In [None]:
model = Sequential()
model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(window_size_past, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(128, kernel_size=3, activation='relu'))
model.add(Flatten())
model.add(Dense(window_size_future))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
model.fit(x_train, y_train, epochs=10, batch_size=16)

In [None]:
loss = model.evaluate(x_test, y_test)

In [None]:
prediction = model.predict(x_test[-1].reshape(1, window_size_past, 1))

In [None]:
forecast = scaler.inverse_transform(np.array(prediction).reshape(-1, 1))

In [None]:
true = scaler.inverse_transform(y_test[-1])

In [None]:
def plot_predict(values_true, values_predict, c1, c2, title, xlabel, ylabel, dim):
                
    plt.figure(figsize=dim) 
    plt.grid(color=c2, linestyle="-", linewidth="0.2", axis="y")
    
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    
    len_x = len(values_true)
    xvalue = [x for x in range(0,len_x)]  

    plt.plot(xvalue, values_true, color=c1, linewidth="2")
    plt.plot(xvalue, values_predict, color="red", linewidth="2")
    
    #file_name = "".join(title.lower()).replace(" ", "_")
    #path = make_path(plot_path, file_name)
    #plt.savefig(path, bbox_inches='tight', transparent=True)
    plt.show()

In [None]:
title = "Curva di carico giornaliera"
xlabel = "TotalPower"
ylabel = "W"
dim = (15,5)
c1 = "#2271b3"
c2 = "#000000"
plot_predict(true.reshape(1,window_size_future)[0], forecast.reshape(1,window_size_future)[0], c1, c2, title, xlabel, ylabel, dim)