In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
X_train = pd.read_csv('X_train_NHkHMNU.csv')
Y_train = pd.read_csv('y_train_ZAN5mwg.csv')
X_test = pd.read_csv('X_test_final.csv')

In [4]:
X_train.head()

Unnamed: 0,ID,DAY_ID,COUNTRY,DE_CONSUMPTION,FR_CONSUMPTION,DE_FR_EXCHANGE,FR_DE_EXCHANGE,DE_NET_EXPORT,FR_NET_EXPORT,DE_NET_IMPORT,...,FR_RESIDUAL_LOAD,DE_RAIN,FR_RAIN,DE_WIND,FR_WIND,DE_TEMP,FR_TEMP,GAS_RET,COAL_RET,CARBON_RET
0,1054,206,FR,0.210099,-0.427458,-0.606523,0.606523,,0.69286,,...,-0.444661,-0.17268,-0.556356,-0.790823,-0.28316,-1.06907,-0.063404,0.339041,0.124552,-0.002445
1,2049,501,FR,-0.022399,-1.003452,-0.022063,0.022063,-0.57352,-1.130838,0.57352,...,-1.183194,-1.2403,-0.770457,1.522331,0.828412,0.437419,1.831241,-0.659091,0.047114,-0.490365
2,1924,687,FR,1.395035,1.978665,1.021305,-1.021305,-0.622021,-1.682587,0.622021,...,1.947273,-0.4807,-0.313338,0.431134,0.487608,0.684884,0.114836,0.535974,0.743338,0.204952
3,297,720,DE,-0.983324,-0.849198,-0.839586,0.839586,-0.27087,0.56323,0.27087,...,-0.976974,-1.114838,-0.50757,-0.499409,-0.236249,0.350938,-0.417514,0.911652,-0.296168,1.073948
4,1101,818,FR,0.143807,-0.617038,-0.92499,0.92499,,0.990324,,...,-0.526267,-0.541465,-0.42455,-1.088158,-1.01156,0.614338,0.729495,0.245109,1.526606,2.614378


In [5]:
Y_train.head()
#plt.plot(Y_train['TARGET'])

Unnamed: 0,ID,TARGET
0,1054,0.028313
1,2049,-0.112516
2,1924,-0.18084
3,297,-0.260356
4,1101,-0.071733


In [6]:
X_test.head()

Unnamed: 0,ID,DAY_ID,COUNTRY,DE_CONSUMPTION,FR_CONSUMPTION,DE_FR_EXCHANGE,FR_DE_EXCHANGE,DE_NET_EXPORT,FR_NET_EXPORT,DE_NET_IMPORT,...,FR_RESIDUAL_LOAD,DE_RAIN,FR_RAIN,DE_WIND,FR_WIND,DE_TEMP,FR_TEMP,GAS_RET,COAL_RET,CARBON_RET
0,1115,241,FR,0.340083,-0.433604,-0.423521,0.423521,0.165333,0.519419,-0.165333,...,-0.222525,-0.51318,-0.182048,-0.982546,-0.876632,0.880491,0.692242,0.569419,-0.029697,-0.929256
1,1202,1214,FR,0.803209,0.780411,0.60161,-0.60161,0.342802,0.555367,-0.342802,...,0.857739,-0.340595,-0.301094,-0.759816,-1.221443,-0.616617,-0.737496,0.251251,0.753646,0.664086
2,1194,1047,FR,0.79554,0.721954,1.179158,-1.179158,1.620928,0.666901,-1.620928,...,0.447967,0.796475,-0.367248,0.376055,-0.483363,0.865138,0.120079,-1.485642,-0.32645,-0.349747
3,1084,1139,FR,0.172555,-0.723427,-0.044539,0.044539,,-0.205276,,...,-0.561295,-0.542606,-0.013291,-0.791119,-0.894309,0.239153,0.457457,-0.746863,2.262654,0.642069
4,1135,842,FR,0.949714,0.420236,0.617391,-0.617391,0.608561,-0.240856,-0.608561,...,0.503567,-0.230291,-0.609203,-0.744986,-1.196282,0.176557,0.312557,-2.219626,-0.509272,-0.488341


In [20]:
merged_df = pd.merge(X_train[['ID', 'DAY_ID']], Y_train, on='ID')
merged_sorted = merged_df.sort_values(by="DAY_ID")
merged_sorted = merged_sorted[['DAY_ID', 'TARGET']]
merged_sorted.head(20)
#plt.plot(merged_sorted['DAY_ID'], merged_sorted["TARGET"])

Unnamed: 0,DAY_ID,TARGET
742,0,0.108953
1211,1,-0.063369
1408,2,2.575976
1468,2,0.86127
329,3,0.068905
680,3,7.138604
512,5,1.031308
991,5,0.026374
592,7,-0.118915
1356,7,-0.021227


In [21]:
# Normalize the 'TARGET' prices using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
merged_sorted['TARGET_scaled'] = scaler.fit_transform(merged_sorted['TARGET'].values.reshape(-1, 1))

train_size = int(len(merged_sorted) * 0.8)
train_data, test_data = merged_sorted.iloc[:train_size], merged_sorted.iloc[train_size:]

In [24]:
# Step 2: Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # Get the last time step's output
        out = out.view(out.size(0), -1)  # Reshape to (batch_size, hidden_size)
        out = self.fc(out)
        return out

In [25]:
# Step 3: Train the LSTM model
input_size = 1  # Number of features (e.g., 'TARGET_scaled')
hidden_size = 50  # Number of hidden units
output_size = 1  # Number of output units

# Convert training and testing data to PyTorch tensors
X_train_tensor = torch.tensor(train_data['TARGET_scaled'].values).unsqueeze(-1).float()
Y_train_tensor = torch.tensor(train_data['TARGET_scaled'].shift(-1).dropna().values).float()
X_test_tensor = torch.tensor(test_data['TARGET_scaled'].values).unsqueeze(-1).float()
Y_test_tensor = torch.tensor(test_data['TARGET_scaled'].shift(-1).dropna().values).float()

# Initialize the LSTM model
model = LSTMModel(input_size, hidden_size, output_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs.squeeze(), Y_train_tensor)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

IndexError: too many indices for tensor of dimension 2

In [None]:
# Step 4: Evaluate the model
with torch.no_grad():
    predicted_prices = model(X_test_tensor)
    # Calculate evaluation metrics (e.g., RMSE)