In [23]:
import pandas as pd
import numpy as np
import random
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

# Set random seed for reproducibility
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)


<torch._C.Generator at 0x191caaa5390>

In [24]:
# Load your dataset
data = pd.read_csv(r"C:\Users\Sasi Kanth\Desktop\SolarPower\SolcastData\Data17_Filtered.csv")

# Retain the 'period_end' column for tracking timestamps
timestamps = data['period_end'].values

# Drop unnecessary columns (you can drop 'period' but retain 'period_end')
data = data.drop(columns=['period'])

# Separate the target column 'gti' and features
target_column = 'gti'
X_data = data.drop(columns=[target_column, 'period_end'])
y_data = data[target_column]

# Feature scaling for features and target
scaler_X = MinMaxScaler()
scaled_X = scaler_X.fit_transform(X_data)

scaler_y = MinMaxScaler()
scaled_y = scaler_y.fit_transform(y_data.values.reshape(-1, 1))


In [25]:
# Create sequences and labels for LSTM
def create_sequences(X, y, sequence_length):
    sequences_X = []
    sequences_y = []
    for i in range(len(X) - sequence_length):
        sequences_X.append(X[i:i + sequence_length])
        sequences_y.append(y[i + sequence_length])
    return np.array(sequences_X), np.array(sequences_y)

sequence_length = 24  # Number of 5-min intervals (e.g., for 2 hours ahead)
X, y = create_sequences(scaled_X, scaled_y, sequence_length)

# Adjust the timestamps for the sequences (remove the first 'sequence_length' entries)
timestamps = timestamps[sequence_length:]


In [4]:
# Create sequences and labels for LSTM
def create_sequences(X, y, sequence_length):
    sequences_X = []
    sequences_y = []
    for i in range(len(X) - sequence_length):
        sequences_X.append(X[i:i + sequence_length])
        sequences_y.append(y[i + sequence_length])
    return np.array(sequences_X), np.array(sequences_y)

sequence_length = 24  # Number of 5-min intervals (e.g., for 2 hours ahead)
X, y = create_sequences(scaled_X, scaled_y, sequence_length)

# Adjust the timestamps for the sequences (remove the first 'sequence_length' entries)
timestamps = timestamps[sequence_length:]


In [26]:
# Split into training and testing datasets
split = int(0.7 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [33]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_size1, batch_first=True)
        self.dropout1 = nn.Dropout(0.3)
        self.lstm2 = nn.LSTM(hidden_size1, hidden_size2, batch_first=True)
        self.dropout2 = nn.Dropout(0.3)
        self.fc = nn.Linear(hidden_size2, output_size)
    
    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x = self.fc(x[:, -1, :])  # Output for the last timestep
        return x


In [34]:
input_size = X_train.shape[2]  # Number of features
hidden_size1 = 128
hidden_size2 = 64
output_size = 1  # Predicting a single value 'gti'

model = LSTMModel(input_size, hidden_size1, hidden_size2, output_size)


In [35]:
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
    
    val_loss /= len(test_loader)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")


Epoch 1/20, Train Loss: 0.0015, Validation Loss: 0.0005
Epoch 2/20, Train Loss: 0.0008, Validation Loss: 0.0006
Epoch 3/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 4/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 5/20, Train Loss: 0.0007, Validation Loss: 0.0006
Epoch 6/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 7/20, Train Loss: 0.0007, Validation Loss: 0.0006
Epoch 8/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 9/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 10/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 11/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 12/20, Train Loss: 0.0007, Validation Loss: 0.0006
Epoch 13/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 14/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 15/20, Train Loss: 0.0007, Validation Loss: 0.0004
Epoch 16/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 17/20, Train Loss: 0.0007, Validation Loss: 0.0005
Epoch 18/20, Train Loss: 0.0007, Validat

In [36]:
model.eval()
predictions = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        preds = model(X_batch)
        predictions.append(preds.cpu().numpy())

predictions = np.concatenate(predictions).reshape(-1, 1)

# Rescale predictions back to original scale
predictions_original = scaler_y.inverse_transform(predictions)
y_test_original = scaler_y.inverse_transform(y_test)


In [38]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# After making predictions
predictions = np.concatenate(predictions).reshape(-1, 1)  # Combine batches into one array

# Rescale predictions and actual values back to original scale
predictions_original = scaler_y.inverse_transform(predictions)
y_test_original = scaler_y.inverse_transform(y_test.reshape(-1, 1))


# Mean Squared Error (MSE)
mse = mean_squared_error(y_test_original, predictions_original)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test_original, predictions_original)

# Filter out zero actual values to avoid division by zero in MAPE
non_zero_indices = y_test_original != 0

# Mean Absolute Percentage Error (MAPE) for non-zero values
mape = np.mean(np.abs((y_test_original[non_zero_indices] - predictions_original[non_zero_indices]) / y_test_original[non_zero_indices])) * 100

# Calculate the absolute errors
errors = np.abs(y_test_original - predictions_original)

# Find the maximum error
max_error = np.max(errors)
max_error_index = np.argmax(errors)
max_error_value_actual = y_test_original[max_error_index]
max_error_value_predicted = predictions_original[max_error_index]
max_error_timestamp = timestamps_test[max_error_index]

# Find the minimum error
min_error = np.min(errors)
min_error_index = np.argmin(errors)
min_error_value_actual = y_test_original[min_error_index]
min_error_value_predicted = predictions_original[min_error_index]
min_error_timestamp = timestamps_test[min_error_index]

# Print the results
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

print(f"\nMaximum Error: {max_error:.4f}")
print(f"At time: {max_error_timestamp}")
print(f"Actual value: {max_error_value_actual[0]:.4f}, Predicted value: {max_error_value_predicted[0]:.4f}")

print(f"\nMinimum Error: {min_error:.4f}")
print(f"At time: {min_error_timestamp}")
print(f"Actual value: {min_error_value_actual[0]:.4f}, Predicted value: {min_error_value_predicted[0]:.4f}")


Mean Squared Error (MSE): 661.9749
Mean Absolute Error (MAE): 12.1622
Mean Absolute Percentage Error (MAPE): 13.03%

Maximum Error: 663.9030
At time: 2024-03-06 11:25:00+04:00
Actual value: 925.0000, Predicted value: 261.0970

Minimum Error: 0.0000
At time: 2020-11-29 07:40:00+04:00
Actual value: 522.0000, Predicted value: 522.0000
