In [1]:
import pandas as pd
import os
import numpy as np
from functools import reduce

In [None]:
os.listdir()

In [82]:
dap = pd.read_csv('dap_2018-2023.csv', sep=';', parse_dates=['Unnamed: 0']).rename(columns={"Unnamed: 0":'Timestamp', '0':'DAP'})
forecast_load = pd.read_csv('forecast_load_2018-2023.csv', sep=';', parse_dates=['Unnamed: 0']).rename(columns={'Unnamed: 0':'Timestamp'})
forecast_renewable = pd.read_csv('Forecast_renewable_2018-2023.csv', sep=';', parse_dates=['Unnamed: 0']).rename(columns={'Unnamed: 0':'Timestamp'})

In [83]:
dap.loc[dap['DAP']>= 2000, 'DAP'] = np.nan
dap = dap.interpolate()

  dap = dap.interpolate()


In [84]:
dap.isna().sum().sum(), forecast_load.isna().sum().sum(), forecast_renewable.isna().sum().sum()

(0, 0, 41052)

In [85]:
def timestamp_parser(s):
    return pd.to_datetime(s['Timestamp'], utc=True).dt.tz_convert('Europe/Paris').dt.tz_localize(None)

In [86]:
dap['Timestamp'] = timestamp_parser(dap)
forecast_load['Timestamp'] = timestamp_parser(forecast_load)
forecast_renewable['Timestamp'] = timestamp_parser(forecast_renewable)

In [87]:
dap.drop_duplicates(subset='Timestamp', keep='last', inplace=True)
forecast_load.drop_duplicates(subset='Timestamp', keep='last', inplace=True)
forecast_renewable.drop_duplicates(subset='Timestamp', keep='last', inplace=True)

In [88]:
dap.isna().sum().sum(), forecast_load.isna().sum().sum(), forecast_renewable.isna().sum().sum()

(0, 0, 41049)

In [89]:
forecast_renewable.fillna(value=0.0, inplace=True)

In [90]:
dap.isna().sum().sum(), forecast_load.isna().sum().sum(), forecast_renewable.isna().sum().sum()

(0, 0, 0)

In [91]:
forecast_renewable['Wind'] = forecast_renewable['Wind Offshore'] + forecast_renewable['Wind Onshore']
forecast_renewable.drop(columns=['Wind Offshore', 'Wind Onshore'], inplace=True)


In [92]:
def fillna_moving_average(df):
    if df.isna().any().sum()>0:
        return df.fillna((df.ffill() + df.bfill())/2, inplace=False)
    else:
        return df

In [94]:
forecast_load = fillna_moving_average(forecast_load.set_index('Timestamp').resample('h').mean()).reset_index()
forecast_renewable=fillna_moving_average(forecast_renewable.set_index('Timestamp').resample('h').mean()).reset_index()
dap = fillna_moving_average(dap.set_index('Timestamp').resample('h').mean()).reset_index()

In [95]:
dap.isna().sum().sum(), forecast_load.isna().sum().sum(), forecast_renewable.isna().sum().sum()

(0, 0, 0)

In [96]:
df = reduce(lambda df1,df2:pd.merge(df1,df2, on='Timestamp'), [dap, forecast_load, forecast_renewable])

In [97]:
df.isna().sum()

Timestamp          0
DAP                0
Forecasted Load    0
Solar              0
Wind               0
dtype: int64

In [98]:
df['Residual_Demand'] = df['Forecasted Load'] - df['Solar'] - df['Wind']

In [99]:
df.drop(columns=['Solar','Wind','Forecasted Load'], inplace=True)

In [100]:
df['Date'] = df['Timestamp'].dt.date

In [101]:
df2 = df.drop(columns=['Timestamp']).groupby(['Date'], group_keys=False).apply(lambda x:x.drop(columns=['Date']).values.reshape((1,48))).reset_index()

  df2 = df.drop(columns=['Timestamp']).groupby(['Date'], group_keys=False).apply(lambda x:x.drop(columns=['Date']).values.reshape((1,48))).reset_index()


In [None]:
df2[0].iloc[0].shape

In [102]:
df2['A'] = df2[0].apply(lambda x:x.tolist()[0])

In [None]:
df2

In [103]:
df2[[item for sublist in zip([f'DAP{i}' for i in range(24)], [f'FRD{i}' for i in range(24)]) for item in sublist]] = df2['A'].apply(pd.Series)

In [104]:
df2.drop(columns=[0,'A'],inplace=True)

In [None]:
df2

In [105]:
Y = df2[[f'DAP{i}' for i in range(24)]]
X = df2[[f'FRD{i}' for i in range(24)]]

# MLP

In [19]:
from sklearn.preprocessing import MinMaxScaler

In [21]:
X.to_numpy()

array([[56859., 53558., 52054., ..., 49405., 49259., 52641.],
       [49249., 45931., 44183., ..., 45708., 43952., 46982.],
       [44525., 41357., 40440., ..., 50455., 48569., 52230.],
       ...,
       [39563., 36971., 36466., ..., 43717., 42588., 44865.],
       [42237., 39997., 40029., ..., 37054., 36866., 38231.],
       [35588., 32520., 30963., ..., 33642., 34910., 37230.]])

In [22]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

In [176]:
scaler_y.fit_transform(Y).astype('float32')

array([[0.08529983, 0.09006682, 0.09589317, ..., 0.03981327, 0.07442037,
        0.07983589],
       [0.07153802, 0.06875271, 0.06923375, ..., 0.04639009, 0.06126731,
        0.07064806],
       [0.0559958 , 0.04774794, 0.05380017, ..., 0.05218396, 0.065844  ,
        0.06720926],
       ...,
       [0.00049618, 0.00371218, 0.01124136, ..., 0.01952494, 0.03505298,
        0.0253462 ],
       [0.01989113, 0.01924148, 0.02504464, ..., 0.02815702, 0.06579155,
        0.05750362],
       [0.02670636, 0.01132216, 0.01746759, ..., 0.00802529, 0.01409725,
        0.00493912]], dtype=float32)

In [58]:
X

Unnamed: 0,FRD0,FRD1,FRD2,FRD3,FRD4,FRD5,FRD6,FRD7,FRD8,FRD9,...,FRD14,FRD15,FRD16,FRD17,FRD18,FRD19,FRD20,FRD21,FRD22,FRD23
0,56859.0,53558.0,52054.0,48750.0,46996.0,46527.0,47756.0,49536.0,50979.0,51954.0,...,47533.0,46260.0,46225.0,49270.0,53075.0,53745.0,51700.0,49405.0,49259.0,52641.0
1,49249.0,45931.0,44183.0,40586.0,38239.0,37683.0,38376.0,39370.0,40552.0,42431.0,...,42439.0,40791.0,41078.0,44423.0,48812.0,50620.0,48513.0,45708.0,43952.0,46982.0
2,44525.0,41357.0,40440.0,37573.0,36607.0,38698.0,44191.0,51885.0,54279.0,54481.0,...,51789.0,50879.0,50999.0,54038.0,57924.0,59230.0,55192.0,50455.0,48569.0,52230.0
3,50071.0,47256.0,46225.0,43645.0,42713.0,45043.0,50775.0,58706.0,61990.0,62678.0,...,60567.0,59258.0,59636.0,62990.0,66785.0,67992.0,63796.0,59746.0,58094.0,61111.0
4,57274.0,53753.0,52358.0,49462.0,48265.0,50100.0,55185.0,61469.0,64081.0,64091.0,...,59158.0,57335.0,57344.0,60113.0,63325.0,64205.0,59807.0,55705.0,53553.0,56548.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1852,49337.0,46808.0,45655.0,43403.0,41402.0,41468.0,44534.0,49401.0,51658.0,51053.0,...,36750.0,38016.0,39306.0,42193.0,44089.0,44312.0,42632.0,40102.0,38973.0,41407.0
1853,37469.0,36747.0,36444.0,34598.0,33557.0,34864.0,36972.0,39739.0,42060.0,43070.0,...,36864.0,37706.0,39291.0,42616.0,45704.0,47246.0,44528.0,41561.0,40344.0,42081.0
1854,39563.0,36971.0,36466.0,34115.0,32665.0,32956.0,35848.0,39241.0,41611.0,42992.0,...,38885.0,39942.0,40880.0,43974.0,47275.0,49004.0,46389.0,43717.0,42588.0,44865.0
1855,42237.0,39997.0,40029.0,37768.0,36802.0,36653.0,38198.0,40164.0,41996.0,42670.0,...,34855.0,35899.0,36681.0,39565.0,42056.0,42329.0,39691.0,37054.0,36866.0,38231.0


In [26]:
import torch
from torch import nn
from torch.utils.data import random_split, DataLoader, TensorDataset
from torch.optim import Adam 

In [195]:
x=scaler_X.fit_transform(X).astype('float32')
y=scaler_y.fit_transform(Y).astype('float32')

In [196]:
class Data():
    def __init__(self, X,Y, batch_size):
        self.X_ = X
        self.Y_ = Y
        self.batch_size_ = batch_size

    def prepare_dataset(self):
        print('Creation of the dataset ....')
        # Convert Input and Output data to Tensors and create a TensorDataset 
        #X = torch.Tensor(scaler_X.fit_transform(self.X_.to_numpy(dtype='float32')))       # Create tensor of type torch.float32 
        X = torch.Tensor(self.X_) 
        #Y = torch.tensor(scaler_y.fit_transform(self.Y_.to_numpy(dtype='float32')))  # Create tensor type torch.float32 of size (n,1)  
        Y = torch.tensor(self.Y_)

        data = TensorDataset(X, Y)
        train_set, test_set = random_split(data, [0.8, 0.2]) 

        # Create Dataloader to read the data within batch sizes and put into memory. 
        train_loader = DataLoader(train_set, batch_size = self.batch_size_, shuffle = True) 
        test_loader = DataLoader(test_set, batch_size =1, drop_last=False)

        print("Train et test datasets created !")
        return train_loader, test_loader

In [197]:
data = Data(X=x,Y=y,batch_size=64)
    
train, test = data.prepare_dataset()

Creation of the dataset ....
Train et test datasets created !


In [201]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size, learning_rate, loss):
        super().__init__()

        self.input_size_ = input_size
        self.output_size_ = output_size
        self.learning_rate_ = learning_rate
        self.loss_ = loss 
        self.mlp_ = nn.Sequential(
            #nn.Flatten(),
            nn.BatchNorm1d(num_features=self.input_size_),
            nn.Linear(self.input_size_, 1500),
            nn.ReLU(),
            nn.Linear(1500, 3000),
            nn.ReLU(),
            nn.Linear(3000, 200),
            nn.ReLU(),
            nn.Linear(200,self.output_size_)
        )
        self.optimizer_ = Adam(self.mlp_.parameters(), lr=self.learning_rate_)

    def forward(self, x):
        value = self.mlp_(x)
        return value
    
    def train(self, num_epoch, training_set):
        #best_accuracy = 0.0
        print("Training is beginning...")

        for epoch in range(1,num_epoch+1):
            for data in training_set:
                X,y = data 
                self.optimizer_.zero_grad()   # zero the parameter gradients          
                predicted_outputs = self.forward(X)   # predict output from the model 
                #print(predicted_outputs)
                #print(y.size())
                train_loss = self.loss_(predicted_outputs, y)   # calculate loss for the predicted output
                
                train_loss.backward()   # backpropagate the loss 
                self.optimizer_.step()
            print(f"Epoch {epoch} : Loss : {train_loss}")

        print("Training is finished !")

    def predict(self, x_test):
        self.mlp_.eval()
        with torch.no_grad():
            output = self.mlp_(x_test)
            return output

In [202]:
model = NeuralNetwork(input_size=24, 
                          output_size=24, 
                          learning_rate=1e-3, 
                          loss=nn.MSELoss())
model.train(10,train)

Training is beginning...
Epoch 1 : Loss : 0.010777143761515617
Epoch 2 : Loss : 0.014680532738566399
Epoch 3 : Loss : 0.015723535791039467
Epoch 4 : Loss : 0.018414968624711037
Epoch 5 : Loss : 0.03214017301797867
Epoch 6 : Loss : 0.018546489998698235
Epoch 7 : Loss : 0.004787443671375513
Epoch 8 : Loss : 0.018249521031975746
Epoch 9 : Loss : 0.0314156748354435
Epoch 10 : Loss : 0.007348966784775257
Training is finished !


In [209]:
for x_test, y_test in test:
    print(scaler_y.inverse_transform(model.predict(x_test)))
    print(scaler_y.inverse_transform(y_test))
    loss = nn.MSELoss()
    break
    

[[ 77.04919172  64.08130503  65.67581825  67.00259039  55.70528688
   57.19839687  80.14628384  93.35970364  99.20053497 109.70594813
   92.26434873  92.54911359 100.85136976  97.93717627  91.36443547
   90.53340038  90.1444444   88.22999295 129.05645213 110.26050363
  112.35786509  85.64442428  79.39211967  66.84148255]]
[[56.18000106 55.26000157 54.67000171 48.16000175 46.56999575 48.8600007
  50.05000077 65.39999804 62.11999741 68.45999859 67.78999972 70.77999935
  70.70000196 69.50000239 68.77000047 73.59999633 65.63000008 74.75999827
  78.92999938 77.09999876 70.00000205 60.0300012  61.5599982  59.97000026]]


ValueError: Expected 2D array, got 1D array instead:
array=[0.11257416 0.10270572 0.11323006 0.16200545 0.15353319 0.124493
 0.13761112 0.14727969 0.15003297 0.15702498 0.1466203  0.13963485
 0.15129206 0.17464718 0.17966476 0.23878823 0.19171833 0.14215818
 0.15157822 0.12235868 0.12916699 0.08385882 0.10420442 0.08885309].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [185]:
scaler_y.inverse_transform(y_test)

array([[114.74999804, 111.13999573, 108.95999789, ..., 134.2499979 ,
        122.8200048 , 109.16000536],
       [ 38.84000119,  37.70000051,  36.72999766, ...,  43.5300015 ,
         43.38999988,  45.41999885],
       [ 26.70999959,  18.89000109,  16.56000112, ...,  24.63000066,
         32.44999895,  20.93999948],
       ...,
       [129.08000194, 114.63000079, 112.57000195, ..., 144.96000662,
        140.04000556, 134.54999674],
       [ 20.20000018,  19.21000067,  19.50000094, ...,  24.51000041,
         24.02000032,  21.18000002],
       [199.49999697, 192.52999497, 192.77999929, ..., 270.00000514,
        258.84998962, 247.69000954]])

In [187]:
df[df['DAP'].between(114,115)]

Unnamed: 0,Timestamp,DAP,Residual_Demand,Date
17753,2020-12-09 17:00:00,114.00,70968.0,2020-12-09
18475,2021-01-08 19:00:00,114.89,83532.0,2021-01-08
23456,2021-08-04 08:00:00,115.00,40284.0,2021-08-04
23457,2021-08-04 09:00:00,114.68,42558.0,2021-08-04
23600,2021-08-10 08:00:00,115.00,37238.0,2021-08-10
...,...,...,...,...
43861,2023-12-02 13:00:00,114.58,54356.0,2023-12-02
43887,2023-12-03 15:00:00,115.00,47413.0,2023-12-03
43916,2023-12-04 20:00:00,115.00,54395.0,2023-12-04
44084,2023-12-11 20:00:00,114.41,54408.0,2023-12-11
