In [None]:
import pandas as pd
import os
import numpy as np
import datetime as dt
import yfinance as yf
import matplotlib.pyplot as plt

In [2]:
# The ticker symbol for crude oil is 'CL=F'
oil_data = yf.download('CL=F', start='2023-01-01', end='2023-12-31')
oil_data.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)

[*********************100%***********************]  1 of 1 completed


In [3]:
data_array = oil_data.values
simple_dates = pd.to_datetime(oil_data.index).date
oil_df = pd.DataFrame({'SimpleDate': simple_dates, 'price_oil': data_array.flatten()})

In [4]:
current_dir = r'C:\Users\tamas\Documents\GitHub\deeplearning'
da_df = pd.read_csv(current_dir + '\\DA_prices.csv')
id_df = pd.read_csv(current_dir + '\\ID_prices.csv')
gas_df = pd.read_csv(current_dir + '\\gas\\MIBGAS_Data_2023.csv', sep=';', skiprows=1).drop('Unnamed: 7', axis=1)

In [5]:
# Convert the date columns to datetime
da_df['Date'] = da_df.apply(
    lambda row: (
        dt.datetime(row['Year'].astype(int), row['Month'].astype(int), row['Day'].astype(int), 23, 59, 59)
        if row['Hour'] == 24
        else dt.datetime(row['Year'].astype(int), row['Month'].astype(int), row['Day'].astype(int), row['Hour'].astype(int))
    ), 
    axis=1
)
id_df['Date'] = id_df.apply(
    lambda row: (
        dt.datetime(row['Year'].astype(int), row['Month'].astype(int), row['Day'].astype(int), 23, 59, 59)
        if row['Hour'] == 24
        else dt.datetime(row['Year'].astype(int), row['Month'].astype(int), row['Day'].astype(int), row['Hour'].astype(int))
    ), 
    axis=1
)

In [None]:
gs_df = gas_df[gas_df['Product'] == 'GDAES_D+1'].copy()
merged_df = pd.merge(id_df, da_df[['Date', 'DA ES']], on=['Date'], how='inner')
# replace date column to the beginning
merged_df = merged_df[['Date'] + [col for col in merged_df.columns if col != 'Date']]
merged_df['SimpleDate'] = merged_df['Date'].dt.date
gs_df['SimpleDate'] = gs_df['Trading day'].apply(lambda x: dt.datetime.strptime(x, '%d/%m/%Y').date())
merged_df = pd.merge(merged_df, gs_df[['SimpleDate', 'MIBGAS Daily Price [EUR/MWh]']], on=['SimpleDate'], how='left')
merged_df = pd.merge(merged_df, oil_df, on=['SimpleDate'], how='left')

Unnamed: 0,Date,Year,Month,Day,Hour,MaxES,MinES,AvgES,DA ES,SimpleDate,MIBGAS Daily Price [EUR/MWh],price_oil
0,2023-01-01 01:00:00,2023,1,1,1,0.14,-4.0,-0.72,0.0,2023-01-01,70.45,
1,2023-01-01 02:00:00,2023,1,1,2,0.13,-3.23,-0.45,0.0,2023-01-01,70.45,
2,2023-01-01 03:00:00,2023,1,1,3,0.12,-9.98,-0.95,0.0,2023-01-01,70.45,
3,2023-01-01 04:00:00,2023,1,1,4,0.0,-6.49,-3.34,0.0,2023-01-01,70.45,
4,2023-01-01 05:00:00,2023,1,1,5,0.0,-6.69,-2.87,0.0,2023-01-01,70.45,
5,2023-01-01 06:00:00,2023,1,1,6,0.0,-7.0,-3.27,0.0,2023-01-01,70.45,
6,2023-01-01 07:00:00,2023,1,1,7,0.0,-13.58,-3.9,0.0,2023-01-01,70.45,
7,2023-01-01 08:00:00,2023,1,1,8,0.0,-15.4,-3.2,0.0,2023-01-01,70.45,
8,2023-01-01 09:00:00,2023,1,1,9,0.0,-8.68,-1.18,0.0,2023-01-01,70.45,
9,2023-01-01 10:00:00,2023,1,1,10,0.0,-10.19,-2.45,0.0,2023-01-01,70.45,


In [7]:
# Rename columns in merged df to id_en_max, id_en_min, id_en_avg, da_en, da_gas
merged_df.rename(columns={
    'MaxES': 'id_en_max',
    'MinES': 'id_en_min',
    'AvgES': 'id_en_avg',
    'DA ES': 'da_en',
    'MIBGAS Daily Price [EUR/MWh]': 'da_gas'
}, inplace=True)

# Encoding the date column
merged_df['hour_sin'] = np.sin(2 * np.pi * merged_df['Hour'] / 24)
merged_df['hour_cos'] = np.cos(2 * np.pi * merged_df['Hour'] / 24)
merged_df['month_sin'] = np.sin(2 * np.pi * merged_df['Month'] / 12)
merged_df['month_cos'] = np.cos(2 * np.pi * merged_df['Month'] / 12)
merged_df['day_sin'] = np.sin(2 * np.pi * merged_df['Day'] / 31)
merged_df['day_cos'] = np.cos(2 * np.pi * merged_df['Day'] / 31)

# Drop the SimpleDate column
merged_df.drop('SimpleDate', axis=1, inplace=True)
merged_df.head(5)

Unnamed: 0,Date,Year,Month,Day,Hour,id_en_max,id_en_min,id_en_avg,da_en,da_gas,price_oil,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos
0,2023-01-01 01:00:00,2023,1,1,1,0.14,-4.0,-0.72,0.0,70.45,,0.258819,0.965926,0.5,0.866025,0.201299,0.97953
1,2023-01-01 02:00:00,2023,1,1,2,0.13,-3.23,-0.45,0.0,70.45,,0.5,0.866025,0.5,0.866025,0.201299,0.97953
2,2023-01-01 03:00:00,2023,1,1,3,0.12,-9.98,-0.95,0.0,70.45,,0.707107,0.707107,0.5,0.866025,0.201299,0.97953
3,2023-01-01 04:00:00,2023,1,1,4,0.0,-6.49,-3.34,0.0,70.45,,0.866025,0.5,0.5,0.866025,0.201299,0.97953
4,2023-01-01 05:00:00,2023,1,1,5,0.0,-6.69,-2.87,0.0,70.45,,0.965926,0.258819,0.5,0.866025,0.201299,0.97953


In [8]:
# What is the target? Do we shift anything?

# LSTM

In [26]:
#Explanatory LSTM model
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import lstm

In [None]:
lstm_data = merged_df[['hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos', 'da_gas', 'price_oil', 'da_en', 'id_en_avg']].copy()
lstm_data['target'] = lstm_data['id_en_avg'] - lstm_data['da_en']
lstm_data.drop(['id_en_avg', 'da_en'], axis=1, inplace=True)

In [15]:
lstm_data = lstm_data.fillna(0)

In [17]:
# Split the data into train and test
x_train, x_test, y_train, y_test = train_test_split(lstm_data.drop('target', axis=1), lstm_data['target'], test_size=0.2, random_state=1312)
train_data, test_data = train_test_split(lstm_data, test_size=0.2, random_state=1312)

In [24]:
# Hyperparameters
num_layers = 2
batch_size = 64
epochs = 100
hidden_units = 128
learning_rate = 0.0001

In [28]:
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [29]:
class LSTM_Model(nn.Module):
    def __init__(self, hidden_units, num_layers, input_size, output_size):
        super(LSTM_Model, self).__init__()

        # LSTM layer with correct parameters
        self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=hidden_units, num_layers=num_layers, batch_first=True)
        
        # Dropout layer
        self.dropout = nn.Dropout(0.2)
        
        # Fully connected (dense) layer
        self.dense = nn.Linear(hidden_units, 1)  # Assuming you want 1 output from dense layer
        
        # Output layer
        self.output_layer = nn.Linear(hidden_units, output_size)

        self.hidden_units = hidden_units
        self.num_layers = num_layers

    def forward(self, x):
        # Initialize hidden and cell states
        hidden_states = torch.zeros(self.num_layers, x.size(0), self.hidden_units).to(x.device)
        cell_states = torch.zeros(self.num_layers, x.size(0), self.hidden_units).to(x.device)

        # Pass through LSTM
        out, _ = self.lstm1(x, (hidden_states, cell_states))

        # Take the output from the last time step
        out = out[:, -1, :]  # Get the last timestep's output

        # Apply dropout (if necessary)
        out = self.dropout(out)
        
        # Pass through the dense layer
        out = self.dense(out)

        # Pass through output layer (if needed)
        output = self.output_layer(out)

        return output

In [31]:
model = LSTM_Model(hidden_units, num_layers, train_data.shape[1], output_size=test_data.shape[1])
print(model)

LSTM_Model(
  (lstm1): LSTM(9, 128, num_layers=2, batch_first=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (dense): Linear(in_features=128, out_features=1, bias=True)
  (output_layer): Linear(in_features=128, out_features=9, bias=True)
)


In [None]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def train(model, train_dataloader, loss, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        for i, (x, y) in enumerate(train_dataloader):
            # Zero the gradients
            optimizer.zero_grad()
            
            # Forward pass
            output = model(x.float())
            