# Initial Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls

drive  sample_data


In [None]:
%cd /content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/gujrat/implementation2

[Errno 2] No such file or directory: '/content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/gujrat/implementation2'
/content


In [None]:
!pip install --quiet pytorch-lightning==1.2.5

[K     |████████████████████████████████| 826 kB 12.8 MB/s 
[K     |████████████████████████████████| 596 kB 44.9 MB/s 
[K     |████████████████████████████████| 829 kB 47.1 MB/s 
[K     |████████████████████████████████| 398 kB 45.0 MB/s 
[K     |████████████████████████████████| 136 kB 51.1 MB/s 
[K     |████████████████████████████████| 1.1 MB 40.0 MB/s 
[K     |████████████████████████████████| 94 kB 3.1 MB/s 
[K     |████████████████████████████████| 271 kB 53.0 MB/s 
[K     |████████████████████████████████| 144 kB 52.8 MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone


In [None]:
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import math
import matplotlib

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

In [None]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE=['#01BEFE', '#FFDD00', '#FF7D00', '#FF006D', '#ADFF02', '#8F00FF']

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
                
rcParams['figure.figsize']= 12 , 8
                
tqdm.pandas()

In [None]:
pl.seed_everything(42)

Global seed set to 42


42

# Load Data

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/datasets/gujrat_dataset.csv')

In [None]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,GHI
0,2000,1,1,0,0
1,2000,1,1,1,0
2,2000,1,1,2,0
3,2000,1,1,3,128
4,2000,1,1,4,337


In [None]:
# Selecting hours 3 to 12 (both included)
df = df.loc[(df['Hour'] >=3) & (df['Hour'] <= 12)]

In [None]:
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']], format = '%Y/%M/%D %H')

In [None]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,GHI,date
3,2000,1,1,3,128,2000-01-01 03:00:00
4,2000,1,1,4,337,2000-01-01 04:00:00
5,2000,1,1,5,513,2000-01-01 05:00:00
6,2000,1,1,6,640,2000-01-01 06:00:00
7,2000,1,1,7,714,2000-01-01 07:00:00


In [None]:
df = df[['date','GHI']]

In [None]:
df.head()

Unnamed: 0,date,GHI
3,2000-01-01 03:00:00,128
4,2000-01-01 04:00:00,337
5,2000-01-01 05:00:00,513
6,2000-01-01 06:00:00,640
7,2000-01-01 07:00:00,714


In [None]:
df.shape

(54750, 2)

# Pre-processing

In [None]:
features_df = df[['GHI']]

In [None]:
train_size = int(len(features_df) * 0.7)
val_size = int(len(features_df)*0.15)
test_size = int(len(features_df)*0.15)
print('Train Size = ', train_size)
print('Val Size = ', val_size)
print('Test Size = ', test_size)

Train Size =  38325
Val Size =  8212
Test Size =  8212


In [None]:
train_df, val_df, test_df = features_df[:train_size], features_df[train_size:train_size+val_size], features_df[train_size+val_size:]
train_df.shape, val_df.shape, test_df.shape

((38325, 1), (8212, 1), (8213, 1))

In [None]:
scaler = MinMaxScaler(feature_range = (-1,1))
scaler = scaler.fit(train_df)

In [None]:
train_df=pd.DataFrame(
    scaler.transform(train_df),
    index = train_df.index,
    columns = train_df.columns
)

In [None]:
val_df=pd.DataFrame(
    scaler.transform(val_df),
    index = val_df.index,
    columns = val_df.columns
)

In [None]:
test_df=pd.DataFrame(
    scaler.transform(test_df),
    index = test_df.index,
    columns = test_df.columns
)

In [None]:
def create_sequences(input_data: pd.DataFrame, target_column, sequence_length):
    
    sequences = []
    data_size = len(input_data)
    
    for i in tqdm(range(data_size - sequence_length)):
        
        sequence = input_data[i:i+sequence_length]
        
        label_position = i + sequence_length
        label = input_data.iloc[label_position][target_column]
        
        sequences.append((sequence, label))
        
    return sequences

In [None]:
SEQUENCE_LENGTH = 120 # 5 days

train_sequences = create_sequences(train_df, 'GHI', SEQUENCE_LENGTH)
val_sequences = create_sequences(val_df, 'GHI', SEQUENCE_LENGTH)
test_sequences = create_sequences(test_df, 'GHI', SEQUENCE_LENGTH)

  0%|          | 0/38205 [00:00<?, ?it/s]

  0%|          | 0/8092 [00:00<?, ?it/s]

  0%|          | 0/8093 [00:00<?, ?it/s]

In [None]:
len(train_sequences), len(val_sequences), len(test_sequences)

(38205, 8092, 8093)

# Pytorch Dataset

In [None]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [None]:
class SolarDataset(Dataset):
    
    def __init__(self, sequences):
        self.sequences = sequences
    
    def __len__(self):
        return len(self.sequences)
        
    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]

        return torch.Tensor(sequence.to_numpy()), torch.tensor(label).float()

In [None]:
BATCH_SIZE = 8

In [None]:
train_dataset = SolarDataset(train_sequences)
val_dataset = SolarDataset(val_sequences)
test_dataset = SolarDataset(test_sequences)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = False)
val_dataloader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)

# Model

In [None]:
class StackedLSTMs(nn.Module):
    
    def __init__(self, n_features, n_hidden = 256, n_layers = 2):
        super().__init__()
        
        self.n_hidden = n_hidden
        
        self.lstm = nn.LSTM(
            input_size = n_features,
            hidden_size = n_hidden,
            batch_first = True,
            num_layers = n_layers,
            dropout = 0.2
        )
        
        self.regressor = nn.Linear(n_hidden, 1)
        
    def forward(self, x):
        self.lstm.flatten_parameters()
        
        _, (hidden, _) = self.lstm(x)
        out = hidden[-1]
        
        return self.regressor(out)

In [None]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [None]:
EPOCHS = 200
LEARNING_RATE = 0.001

In [None]:
model = StackedLSTMs(train_df.shape[1])
model = model.to(device)
criterion = nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
model

StackedLSTMs(
  (lstm): LSTM(1, 256, num_layers=2, batch_first=True, dropout=0.2)
  (regressor): Linear(in_features=256, out_features=1, bias=True)
)

In [None]:
!mkdir checkpoints2

In [None]:
model.train()
min_valid_loss = np.inf

total_step=len(train_dataloader)
for epoch in range(EPOCHS):
  train_loss = 0.0
  for sequences, labels in tqdm(train_dataloader):
    sequences, labels = sequences.to(device), labels.to(device)
    
    optimizer.zero_grad()

    outputs = model(sequences)
    loss = criterion(outputs,labels.unsqueeze(1))

    # Backward and optimize
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
  
  val_loss = 0.0
  with torch.set_grad_enabled(False):
    for sequences, labels in val_dataloader:
      # Transfer to GPU
        sequences, labels = sequences.to(device), labels.to(device)

        outputs = model(sequences)
        loss = criterion(outputs, labels.unsqueeze(1))
        val_loss += loss.item()

  val_loss = val_loss / len(val_dataloader)
  train_loss = train_loss / len(train_dataloader)

  print(f'Epoch {epoch+1} \t\t Training Loss: {train_loss} \t\t Validation Loss: {val_loss}')
  torch.save(model.state_dict(), 'checkpoints2/epoch_' + str(epoch) + '.pth')


  if min_valid_loss > val_loss:
    print('Validation Loss Decreased from',min_valid_loss,'to ',val_loss,', Saving the model')
    min_valid_loss = val_loss
         
    # Saving State Dict
    torch.save(model.state_dict(), 'checkpoints2/best_model.pth')



  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 1 		 Training Loss: 0.023601388295134362 		 Validation Loss: 0.032376405295594726
Validation Loss Decreased from inf to  0.032376405295594726 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 2 		 Training Loss: 0.01887382314876194 		 Validation Loss: 0.03226672036006399
Validation Loss Decreased from 0.032376405295594726 to  0.03226672036006399 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 3 		 Training Loss: 0.01810417521197082 		 Validation Loss: 0.029157794934291145
Validation Loss Decreased from 0.03226672036006399 to  0.029157794934291145 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 4 		 Training Loss: 0.01774279006499981 		 Validation Loss: 0.028468572102549668
Validation Loss Decreased from 0.029157794934291145 to  0.028468572102549668 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 5 		 Training Loss: 0.017515373243151538 		 Validation Loss: 0.02904774128463305


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 6 		 Training Loss: 0.017150085838538855 		 Validation Loss: 0.02882246411782358


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 7 		 Training Loss: 0.01720908966289681 		 Validation Loss: 0.028814646275285533


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 8 		 Training Loss: 0.017035947795589448 		 Validation Loss: 0.028876522286161697


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 9 		 Training Loss: 0.01702533514835515 		 Validation Loss: 0.02944342999764177


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 10 		 Training Loss: 0.016840377748405955 		 Validation Loss: 0.0317834104434196


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 11 		 Training Loss: 0.016688993963894148 		 Validation Loss: 0.029411810413300907


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 12 		 Training Loss: 0.01725207423407766 		 Validation Loss: 0.033994876662797935


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 13 		 Training Loss: 0.016710033378987976 		 Validation Loss: 0.03337400946664908


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 14 		 Training Loss: 0.0164718613220515 		 Validation Loss: 0.034533619049261506


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 15 		 Training Loss: 0.016467497067469822 		 Validation Loss: 0.032587645615666384


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 16 		 Training Loss: 0.01633697050162293 		 Validation Loss: 0.03019878008774907


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 17 		 Training Loss: 0.01638549482106079 		 Validation Loss: 0.029060475608189463


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 18 		 Training Loss: 0.01624897244805675 		 Validation Loss: 0.03157529706339464


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 19 		 Training Loss: 0.01684074842530621 		 Validation Loss: 0.027634620063203358
Validation Loss Decreased from 0.028468572102549668 to  0.027634620063203358 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 20 		 Training Loss: 0.016084936158553612 		 Validation Loss: 0.026424918575796735
Validation Loss Decreased from 0.027634620063203358 to  0.026424918575796735 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 21 		 Training Loss: 0.01587467875662675 		 Validation Loss: 0.03037881393409141


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 22 		 Training Loss: 0.01592287663010298 		 Validation Loss: 0.026323345946833644
Validation Loss Decreased from 0.026424918575796735 to  0.026323345946833644 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 23 		 Training Loss: 0.015825814921359167 		 Validation Loss: 0.027493150570972135


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 24 		 Training Loss: 0.01575765457357438 		 Validation Loss: 0.02653779180682142


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 25 		 Training Loss: 0.015601815546712533 		 Validation Loss: 0.025722940743377658
Validation Loss Decreased from 0.026323345946833644 to  0.025722940743377658 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 26 		 Training Loss: 0.015896571648477714 		 Validation Loss: 0.02496257184000627
Validation Loss Decreased from 0.025722940743377658 to  0.02496257184000627 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 27 		 Training Loss: 0.015531758458810618 		 Validation Loss: 0.025639743516898048


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 28 		 Training Loss: 0.01551879018096156 		 Validation Loss: 0.02557961944424066


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 29 		 Training Loss: 0.015287662353268344 		 Validation Loss: 0.02460171565697151
Validation Loss Decreased from 0.02496257184000627 to  0.02460171565697151 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 30 		 Training Loss: 0.015305309341562968 		 Validation Loss: 0.024797084364583836


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 31 		 Training Loss: 0.015079201354250582 		 Validation Loss: 0.028135352319396578


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 32 		 Training Loss: 0.015072252457848342 		 Validation Loss: 0.026231223675567027


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 33 		 Training Loss: 0.014887965083697502 		 Validation Loss: 0.02725110208404169


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 34 		 Training Loss: 0.014896619989380654 		 Validation Loss: 0.023973114419442772
Validation Loss Decreased from 0.02460171565697151 to  0.023973114419442772 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 35 		 Training Loss: 0.0148116952769373 		 Validation Loss: 0.026999280277752707


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 36 		 Training Loss: 0.014740721110939365 		 Validation Loss: 0.026398166373866497


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 37 		 Training Loss: 0.014582551606197243 		 Validation Loss: 0.02429502189666755


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 38 		 Training Loss: 0.014497337870817598 		 Validation Loss: 0.029608164009231597


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 39 		 Training Loss: 0.01455502320233038 		 Validation Loss: 0.025874383653062116


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 40 		 Training Loss: 0.014375806689401955 		 Validation Loss: 0.023873222531770814
Validation Loss Decreased from 0.023973114419442772 to  0.023873222531770814 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 41 		 Training Loss: 0.014100481328581953 		 Validation Loss: 0.026530876870432974


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 42 		 Training Loss: 0.01374752634910311 		 Validation Loss: 0.025857134295781965


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 43 		 Training Loss: 0.015413970232845593 		 Validation Loss: 0.025754046590958278


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 44 		 Training Loss: 0.013753589797359038 		 Validation Loss: 0.024585989975187447


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 45 		 Training Loss: 0.013596097003678163 		 Validation Loss: 0.02409622573240312


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 46 		 Training Loss: 0.01335526381123891 		 Validation Loss: 0.024572066348863962


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 47 		 Training Loss: 0.013954828708865245 		 Validation Loss: 0.02630510421855261


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 48 		 Training Loss: 0.01290128242016534 		 Validation Loss: 0.025779954828631664


  0%|          | 0/4776 [00:00<?, ?it/s]

In [None]:
len(train_dataloader)

# Testing

In [None]:
model.load_state_dict(torch.load('checkpoints/epoch_145.pth'))

<All keys matched successfully>

In [None]:
model.train()

StackedLSTMs(
  (lstm): LSTM(1, 64, num_layers=2, batch_first=True, dropout=0.2)
  (regressor): Linear(in_features=64, out_features=1, bias=True)
)

In [None]:
def evaluate_model(loader):
  model.eval()
  mse = 0.0
  mae = 0.0
  for sequences, labels in loader:
    # Transfer to GPU
    sequences, labels = sequences.to(device), labels.to(device)
    outputs = scaler.inverse_transform(model(sequences).cpu().detach().numpy())
    labels = scaler.inverse_transform(labels.unsqueeze(1).cpu())
    mse += ((outputs - labels)*(outputs - labels)).sum()
    mae += abs(outputs - labels).sum()

  mse /= BATCH_SIZE*len(loader)
  mae /= BATCH_SIZE*len(loader)

  print('MSE = ',mse)
  print('RMSE = ',mse**0.5)
  print('MAE = ',mae)

In [None]:
print('Results for Train Set:')
evaluate_model(train_dataloader)
print('Results for Validation Set:')
evaluate_model(val_dataloader)
print('Results for Test Set:')
evaluate_model(test_dataloader)

Results for Train Set
MSE =  8902.23532702579
RMSE =  94.35165778631443
MAE =  64.27958849772158
Results for Validation Set
MSE =  8651.952494406056
RMSE =  93.01587227138202
MAE =  64.81446821423594
Results for Test Set
MSE =  8530.721271734763
RMSE =  92.36190379011664
MAE =  64.71737222403576


In [None]:
print('Results for Train Set:')
evaluate_model(train_dataloader)
print('Results for Validation Set:')
evaluate_model(val_dataloader)
print('Results for Test Set:')
evaluate_model(test_dataloader)

Results for Train Set:
MSE =  6335.088167679495
RMSE =  79.59326710017308
MAE =  52.30359674032878
Results for Validation Set:
MSE =  10810.191938166403
RMSE =  103.97207287616422
MAE =  66.1886708628522
Results for Test Set:
MSE =  10509.205301090396
RMSE =  102.51441508924682
MAE =  66.75109027753597
