# Initial Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls

drive  sample_data


In [None]:
%cd /content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/bidirectional_lstms/gujrat

/content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/bidirectional_lstms/gujrat


In [None]:
!pip install --quiet pytorch-lightning==1.2.5

[K     |████████████████████████████████| 826 kB 4.4 MB/s 
[K     |████████████████████████████████| 829 kB 46.0 MB/s 
[K     |████████████████████████████████| 397 kB 35.4 MB/s 
[K     |████████████████████████████████| 133 kB 49.5 MB/s 
[K     |████████████████████████████████| 596 kB 43.2 MB/s 
[K     |████████████████████████████████| 1.1 MB 45.0 MB/s 
[K     |████████████████████████████████| 94 kB 3.6 MB/s 
[K     |████████████████████████████████| 144 kB 52.5 MB/s 
[K     |████████████████████████████████| 271 kB 50.6 MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone


In [None]:
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import math
import matplotlib

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

In [None]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE=['#01BEFE', '#FFDD00', '#FF7D00', '#FF006D', '#ADFF02', '#8F00FF']

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
                
rcParams['figure.figsize']= 12 , 8
                
tqdm.pandas()

In [None]:
pl.seed_everything(42)

Global seed set to 42


42

# Load Data

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/datasets/gujrat_dataset.csv')

In [None]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,GHI
0,2000,1,1,0,0
1,2000,1,1,1,0
2,2000,1,1,2,0
3,2000,1,1,3,128
4,2000,1,1,4,337


In [None]:
# Selecting hours 7 to 17 (both included)
df = df.loc[(df['Hour'] >=7) & (df['Hour'] <= 17)]

In [None]:
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']], format = '%Y/%M/%D %H')

In [None]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,GHI,date
7,2000,1,1,7,714,2000-01-01 07:00:00
8,2000,1,1,8,695,2000-01-01 08:00:00
9,2000,1,1,9,637,2000-01-01 09:00:00
10,2000,1,1,10,509,2000-01-01 10:00:00
11,2000,1,1,11,317,2000-01-01 11:00:00


In [None]:
df = df[['date','GHI']]

In [None]:
df.head()

Unnamed: 0,date,GHI
7,2000-01-01 07:00:00,714
8,2000-01-01 08:00:00,695
9,2000-01-01 09:00:00,637
10,2000-01-01 10:00:00,509
11,2000-01-01 11:00:00,317


In [None]:
df.shape

(60225, 2)

# Pre-processing

In [None]:
features_df = df[['GHI']]

In [None]:
train_size = int(len(features_df) * 0.7)
val_size = int(len(features_df)*0.15)
test_size = int(len(features_df)*0.15)
print('Train Size = ', train_size)
print('Val Size = ', val_size)
print('Test Size = ', test_size)

Train Size =  42157
Val Size =  9033
Test Size =  9033


In [None]:
train_df, val_df, test_df = features_df[:train_size], features_df[train_size:train_size+val_size], features_df[train_size+val_size:]
train_df.shape, val_df.shape, test_df.shape

((42157, 1), (9033, 1), (9035, 1))

In [None]:
scaler = MinMaxScaler(feature_range = (-1,1))
scaler = scaler.fit(train_df)

In [None]:
train_df=pd.DataFrame(
    scaler.transform(train_df),
    index = train_df.index,
    columns = train_df.columns
)

In [None]:
val_df=pd.DataFrame(
    scaler.transform(val_df),
    index = val_df.index,
    columns = val_df.columns
)

In [None]:
test_df=pd.DataFrame(
    scaler.transform(test_df),
    index = test_df.index,
    columns = test_df.columns
)

In [None]:
def create_sequences(input_data: pd.DataFrame, target_column, sequence_length):
    
    sequences = []
    data_size = len(input_data)
    
    for i in tqdm(range(data_size - sequence_length)):
        
        sequence = input_data[i:i+sequence_length]
        
        label_position = i + sequence_length
        label = input_data.iloc[label_position][target_column]
        
        sequences.append((sequence, label))
        
    return sequences

In [None]:
SEQUENCE_LENGTH = 120 # 5 days

train_sequences = create_sequences(train_df, 'GHI', SEQUENCE_LENGTH)
val_sequences = create_sequences(val_df, 'GHI', SEQUENCE_LENGTH)
test_sequences = create_sequences(test_df, 'GHI', SEQUENCE_LENGTH)

  0%|          | 0/42037 [00:00<?, ?it/s]

  0%|          | 0/8913 [00:00<?, ?it/s]

  0%|          | 0/8915 [00:00<?, ?it/s]

In [None]:
len(train_sequences), len(val_sequences), len(test_sequences)

(42037, 8913, 8915)

# Pytorch Dataset

In [None]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [None]:
class SolarDataset(Dataset):
    
    def __init__(self, sequences):
        self.sequences = sequences
    
    def __len__(self):
        return len(self.sequences)
        
    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]

        return torch.Tensor(sequence.to_numpy()), torch.tensor(label).float()

In [None]:
BATCH_SIZE = 8

In [None]:
train_dataset = SolarDataset(train_sequences)
val_dataset = SolarDataset(val_sequences)
test_dataset = SolarDataset(test_sequences)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = False)
val_dataloader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)

# Model

In [None]:
class StackedLSTMs(nn.Module):
    
    def __init__(self, n_features, n_hidden = 256, n_layers = 2):
        super().__init__()
        
        self.n_hidden = n_hidden
        
        self.lstm = nn.LSTM(
            input_size = n_features,
            hidden_size = n_hidden,
            batch_first = True,
            num_layers = n_layers,
            dropout = 0.2,
            bidirectional = True
        )
        
        self.regressor = nn.Linear(n_hidden, 1)
        
    def forward(self, x):
        self.lstm.flatten_parameters()
        
        _, (hidden, _) = self.lstm(x)
        out = hidden[-1]
        
        return self.regressor(out)

In [None]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [None]:
device

device(type='cuda', index=0)

In [None]:
EPOCHS = 200
LEARNING_RATE = 0.001

In [None]:
model = StackedLSTMs(train_df.shape[1])
model = model.to(device)
criterion = nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
model

StackedLSTMs(
  (lstm): LSTM(1, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (regressor): Linear(in_features=256, out_features=1, bias=True)
)

In [None]:
!ls -l checkpoints

total 568284
-rw------- 1 root root 8433251 Feb 22 09:40 best_model.pth
-rw------- 1 root root 8433251 Feb 21 05:02 epoch_0.pth
-rw------- 1 root root 8433251 Feb 21 06:29 epoch_10.pth
-rw------- 1 root root 8433251 Feb 21 06:38 epoch_11.pth
-rw------- 1 root root 8433251 Feb 21 06:46 epoch_12.pth
-rw------- 1 root root 8433251 Feb 21 06:55 epoch_13.pth
-rw------- 1 root root 8433251 Feb 21 07:04 epoch_14.pth
-rw------- 1 root root 8433251 Feb 21 07:12 epoch_15.pth
-rw------- 1 root root 8433251 Feb 21 07:21 epoch_16.pth
-rw------- 1 root root 8433251 Feb 21 07:30 epoch_17.pth
-rw------- 1 root root 8433251 Feb 21 07:39 epoch_18.pth
-rw------- 1 root root 8433251 Feb 21 07:47 epoch_19.pth
-rw------- 1 root root 8433251 Feb 21 05:10 epoch_1.pth
-rw------- 1 root root 8433251 Feb 21 07:56 epoch_20.pth
-rw------- 1 root root 8433251 Feb 21 08:05 epoch_21.pth
-rw------- 1 root root 8433251 Feb 21 08:14 epoch_22.pth
-rw------- 1 root root 8433251 Feb 21 08:22 epoch_23.pth
-rw------- 1 root 

In [None]:
model.load_state_dict(torch.load('checkpoints/epoch_67.pth'))

<All keys matched successfully>

In [None]:
for i in range(1,10):
  print(i)

1
2
3
4
5
6
7
8
9


In [None]:
model.train()
min_valid_loss = np.inf

total_step=len(train_dataloader)
for epoch in range(68, EPOCHS):
  train_loss = 0.0
  for sequences, labels in tqdm(train_dataloader):
    sequences, labels = sequences.to(device), labels.to(device)
    
    optimizer.zero_grad()

    outputs = model(sequences)
    loss = criterion(outputs,labels.unsqueeze(1))

    # Backward and optimize
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
  
  val_loss = 0.0
  with torch.set_grad_enabled(False):
    for sequences, labels in val_dataloader:
      # Transfer to GPU
        sequences, labels = sequences.to(device), labels.to(device)

        outputs = model(sequences)
        loss = criterion(outputs, labels.unsqueeze(1))
        val_loss += loss.item()

  val_loss = val_loss / len(val_dataloader)
  train_loss = train_loss / len(train_dataloader)

  print(f'Epoch {epoch+1} \t\t Training Loss: {train_loss} \t\t Validation Loss: {val_loss}')
  torch.save(model.state_dict(), 'checkpoints/epoch_' + str(epoch) + '.pth')


  if min_valid_loss > val_loss:
    print('Validation Loss Decreased from',min_valid_loss,'to ',val_loss,', Saving the model')
    min_valid_loss = val_loss
         
    # Saving State Dict
    torch.save(model.state_dict(), 'checkpoints/best_model.pth')



  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 69 		 Training Loss: 0.021944597711610288 		 Validation Loss: 0.030066496347893663
Validation Loss Decreased from inf to  0.030066496347893663 , Saving the model


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 70 		 Training Loss: 0.01638197357037405 		 Validation Loss: 0.0276825827649782
Validation Loss Decreased from 0.030066496347893663 to  0.0276825827649782 , Saving the model


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 71 		 Training Loss: 0.017644517718157386 		 Validation Loss: 0.02778130256972326


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 72 		 Training Loss: 0.01538396153680084 		 Validation Loss: 0.026704361156344877
Validation Loss Decreased from 0.0276825827649782 to  0.026704361156344877 , Saving the model


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 73 		 Training Loss: 0.016083976107628708 		 Validation Loss: 0.025499178225204824
Validation Loss Decreased from 0.026704361156344877 to  0.025499178225204824 , Saving the model


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 74 		 Training Loss: 0.01486140278536985 		 Validation Loss: 0.02589230496074713


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 75 		 Training Loss: 0.015200763697038194 		 Validation Loss: 0.02994148784242424


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 76 		 Training Loss: 0.01547740843648409 		 Validation Loss: 0.02659871885536036


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 77 		 Training Loss: 0.014877856482702888 		 Validation Loss: 0.03082460207638076


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 78 		 Training Loss: 0.014986310432490095 		 Validation Loss: 0.026372816915792987


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 79 		 Training Loss: 0.014547056395759162 		 Validation Loss: 0.028507089607376863


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 80 		 Training Loss: 0.025700874496666856 		 Validation Loss: 0.16630191579941375


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 81 		 Training Loss: 0.0382641349948438 		 Validation Loss: 0.08764917848989232


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 82 		 Training Loss: 0.028296622619294876 		 Validation Loss: 0.12229112783105493


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 83 		 Training Loss: 0.02133419829715716 		 Validation Loss: 0.03004471386731665


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 84 		 Training Loss: 0.015800007049805224 		 Validation Loss: 0.029238693239342216


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 85 		 Training Loss: 0.015132692328048727 		 Validation Loss: 0.028296973459819862


  0%|          | 0/5255 [00:00<?, ?it/s]

Epoch 86 		 Training Loss: 0.014310467609477778 		 Validation Loss: 0.02773401378559976


  0%|          | 0/5255 [00:00<?, ?it/s]

In [None]:
len(train_dataloader)

# Testing

In [None]:
model.load_state_dict(torch.load('checkpoints/epoch_145.pth'))

<All keys matched successfully>

In [None]:
model.train()

StackedLSTMs(
  (lstm): LSTM(1, 64, num_layers=2, batch_first=True, dropout=0.2)
  (regressor): Linear(in_features=64, out_features=1, bias=True)
)

In [None]:
def evaluate_model(loader):
  model.eval()
  mse = 0.0
  mae = 0.0
  for sequences, labels in loader:
    # Transfer to GPU
    sequences, labels = sequences.to(device), labels.to(device)
    outputs = scaler.inverse_transform(model(sequences).cpu().detach().numpy())
    labels = scaler.inverse_transform(labels.unsqueeze(1).cpu())
    mse += ((outputs - labels)*(outputs - labels)).sum()
    mae += abs(outputs - labels).sum()

  mse /= BATCH_SIZE*len(loader)
  mae /= BATCH_SIZE*len(loader)

  print('MSE = ',mse)
  print('RMSE = ',mse**0.5)
  print('MAE = ',mae)

In [None]:
print('Results for Train Set:')
evaluate_model(train_dataloader)
print('Results for Validation Set:')
evaluate_model(val_dataloader)
print('Results for Test Set:')
evaluate_model(test_dataloader)

Results for Train Set
MSE =  8902.23532702579
RMSE =  94.35165778631443
MAE =  64.27958849772158
Results for Validation Set
MSE =  8651.952494406056
RMSE =  93.01587227138202
MAE =  64.81446821423594
Results for Test Set
MSE =  8530.721271734763
RMSE =  92.36190379011664
MAE =  64.71737222403576


In [None]:
print('Results for Train Set:')
evaluate_model(train_dataloader)
print('Results for Validation Set:')
evaluate_model(val_dataloader)
print('Results for Test Set:')
evaluate_model(test_dataloader)

Results for Train Set:
MSE =  6335.088167679495
RMSE =  79.59326710017308
MAE =  52.30359674032878
Results for Validation Set:
MSE =  10810.191938166403
RMSE =  103.97207287616422
MAE =  66.1886708628522
Results for Test Set:
MSE =  10509.205301090396
RMSE =  102.51441508924682
MAE =  66.75109027753597
