In [23]:
import numpy as np
import pandas as pd
import torch.optim as optim

In [45]:
def create_sequences(df, seq_length):
    xs, ys = [], []
    # Iterate over data indices
    for i in range(len(df) - seq_length):
        # Define inputs
        x = df.iloc[i:(i+seq_length), 1]
        # Define target
        y = df.iloc[i+seq_length, 1]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [46]:
train_data = pd.read_csv('electricity_consump/electricity_train.csv')
test_data = pd.read_csv('electricity_consump/electricity_test.csv')
train_data.shape, test_data.shape

((105216, 2), (35040, 2))

In [47]:
#Check if there is a missing datapoint
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv("electricity_consump/electricity_train.csv")

# Convert the timestamp column to datetime format
df["timestamp"] = pd.to_datetime(df["timestamp"])

# Sort the DataFrame by timestamp
df = df.sort_values("timestamp")

# Calculate the expected 15-minute difference between timestamps
expected_diff = pd.Timedelta(minutes=15)

# Iterate through the DataFrame to find gaps
for i in range(len(df) - 1):
    diff = df["timestamp"].iloc[i + 1] - df["timestamp"].iloc[i]
    if diff != expected_diff:
        print("Gap found between:")
        print(df.iloc[i])
        print(df.iloc[i + 1])
        print("Expected difference:", expected_diff)
        print("Actual difference:", diff)
        print("Missing timestamp(s):")
        # Calculate missing timestamp(s) within the gap
        num_missing_timestamps = diff // expected_diff - 1
        for j in range(1, num_missing_timestamps + 1):
            missing_timestamp = df["timestamp"].iloc[i] + j * expected_diff
            print(missing_timestamp)

In [48]:
import torch
from torch.utils.data import TensorDataset

# Use create_sequences to create inputs and targets
X_train, y_train = create_sequences(train_data, 24 * 4)
print(X_train.shape, y_train.shape)

# Create TensorDataset
dataset_train = TensorDataset(
    torch.tensor(X_train).float(),
    torch.tensor(y_train).float()
)
print(len(dataset_train))

(105120, 96) (105120,)
105120


In [49]:
X_test, y_test = create_sequences(test_data, 24 * 4)
print(X_test.shape, y_test.shape)

# Create TensorDataset
dataset_test = TensorDataset(
    torch.tensor(X_test).float(),
    torch.tensor(y_test).float()
)
print(len(dataset_test))

(34944, 96) (34944,)
34944


In [28]:
from torch import nn


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # Define RNN layer
        self.rnn = nn.RNN(
            input_size=1,
            hidden_size=32,
            num_layers=2,
            batch_first=True,
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        # Initialize first hidden state with zeros
        h0 = torch.zeros(2, x.size(0), 32)
        # Pass x and h0 through recurrent layer
        out, _ = self.rnn(x, h0)
        # Pass recurrent layer's last output through linear layer
        out = self.fc(out[:, -1, :])
        return out

In [29]:
class LSTMNet(nn.Module):
    def __init__(self):
        super().__init__()
        # Define RNN layer
        self.lstm = nn.LSTM(
            input_size=1,
            hidden_size=32,
            num_layers=2,
            batch_first=True,
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 32)
        # Initialize long-term memory
        c0 = torch.zeros(2, x.size(0), 32)
        # Pass all inputs to lstm layer
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [30]:
class GRUNet(nn.Module):
    def __init__(self):
        super().__init__()
        # Define RNN layer
        self.gru = nn.GRU(
            input_size=1,
            hidden_size=32,
            num_layers=2,
            batch_first=True,
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        # Initialize first hidden state with zeros
        h0 = torch.zeros(2, x.size(0), 32)
        # Pass x and h0 through recurrent layer
        out, _ = self.rnn(x, h0)
        # Pass recurrent layer's last output through linear layer
        out = self.fc(out[:, -1, :])
        return out

In [50]:
from torch.utils.data import DataLoader

net = LSTMNet()
dataloader_train = DataLoader(
    dataset_train,
    batch_size=16,
    shuffle=True
)
# Set up MSE loss
criterion = nn.MSELoss()
optimizer = optim.Adam(
    net.parameters(), lr=0.0001
)

In [52]:
for epoch in range(3):
    for seqs, labels in dataloader_train:
        # Reshape model inputs
        seqs = seqs.view(16, 96, 1)
        # Get model outputs
        outputs = net(seqs)
        # Compute loss
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 0.9754326343536377
Epoch 2, Loss: 1.5620903968811035
Epoch 3, Loss: 1.043154239654541


In [54]:
dataloader_test = DataLoader(
    dataset_test,
    batch_size=32,
    shuffle=True
)

In [55]:
import torchmetrics
# Define MSE metric
mse = torchmetrics.MeanSquaredError()

net.eval()
with torch.no_grad():
    for seqs, labels in dataloader_test:
        seqs = seqs.view(32, 96, 1)
        # Pass seqs to net and squeeze the result
        outputs = net(seqs).squeeze()
        mse(outputs, labels)

# Compute final metric value
test_mse = mse.compute()
print(f"Test MSE: {test_mse}")

Test MSE: 0.572948694229126
