# Initial Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!ls

drive  sample_data


In [3]:
%cd /content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/implementation2

[Errno 2] No such file or directory: '/content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/implementation2'
/content


In [4]:
!pip install --quiet pytorch-lightning==1.2.5

[K     |████████████████████████████████| 826 kB 5.4 MB/s 
[K     |████████████████████████████████| 596 kB 35.6 MB/s 
[K     |████████████████████████████████| 829 kB 40.2 MB/s 
[K     |████████████████████████████████| 136 kB 49.6 MB/s 
[K     |████████████████████████████████| 398 kB 48.4 MB/s 
[K     |████████████████████████████████| 1.1 MB 40.2 MB/s 
[K     |████████████████████████████████| 94 kB 2.9 MB/s 
[K     |████████████████████████████████| 144 kB 40.8 MB/s 
[K     |████████████████████████████████| 271 kB 49.7 MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone


In [5]:
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import math
import matplotlib

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

In [6]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE=['#01BEFE', '#FFDD00', '#FF7D00', '#FF006D', '#ADFF02', '#8F00FF']

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
                
rcParams['figure.figsize']= 12 , 8
                
tqdm.pandas()

In [7]:
pl.seed_everything(42)

Global seed set to 42


42

# Load Data

In [9]:
df = pd.read_csv('/content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/datasets/tamilnadu_dataset.csv')

In [10]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,GHI
0,2000,1,1,0,0
1,2000,1,1,1,0
2,2000,1,1,2,0
3,2000,1,1,3,0
4,2000,1,1,4,0


In [11]:
# Selecting hours 7 to 16 (both included)
df = df.loc[(df['Hour'] >=7) & (df['Hour'] <= 16)]

In [12]:
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']], format = '%Y/%M/%D %H')

In [13]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,GHI,date
7,2000,1,1,7,138,2000-01-01 07:00:00
8,2000,1,1,8,341,2000-01-01 08:00:00
9,2000,1,1,9,535,2000-01-01 09:00:00
10,2000,1,1,10,692,2000-01-01 10:00:00
11,2000,1,1,11,851,2000-01-01 11:00:00


In [14]:
df = df[['date','GHI']]

In [15]:
df.head()

Unnamed: 0,date,GHI
7,2000-01-01 07:00:00,138
8,2000-01-01 08:00:00,341
9,2000-01-01 09:00:00,535
10,2000-01-01 10:00:00,692
11,2000-01-01 11:00:00,851


In [16]:
df.shape

(54750, 2)

# Pre-processing

In [17]:
features_df = df[['GHI']]

In [18]:
train_size = int(len(features_df) * 0.7)
val_size = int(len(features_df)*0.15)
test_size = int(len(features_df)*0.15)
print('Train Size = ', train_size)
print('Val Size = ', val_size)
print('Test Size = ', test_size)

Train Size =  38325
Val Size =  8212
Test Size =  8212


In [19]:
train_df, val_df, test_df = features_df[:train_size], features_df[train_size:train_size+val_size], features_df[train_size+val_size:]
train_df.shape, val_df.shape, test_df.shape

((38325, 1), (8212, 1), (8213, 1))

In [20]:
scaler = MinMaxScaler(feature_range = (-1,1))
scaler = scaler.fit(train_df)

In [21]:
train_df=pd.DataFrame(
    scaler.transform(train_df),
    index = train_df.index,
    columns = train_df.columns
)

In [22]:
val_df=pd.DataFrame(
    scaler.transform(val_df),
    index = val_df.index,
    columns = val_df.columns
)

In [23]:
test_df=pd.DataFrame(
    scaler.transform(test_df),
    index = test_df.index,
    columns = test_df.columns
)

In [24]:
def create_sequences(input_data: pd.DataFrame, target_column, sequence_length):
    
    sequences = []
    data_size = len(input_data)
    
    for i in tqdm(range(data_size - sequence_length)):
        
        sequence = input_data[i:i+sequence_length]
        
        label_position = i + sequence_length
        label = input_data.iloc[label_position][target_column]
        
        sequences.append((sequence, label))
        
    return sequences

In [25]:
SEQUENCE_LENGTH = 120 # 5 days

train_sequences = create_sequences(train_df, 'GHI', SEQUENCE_LENGTH)
val_sequences = create_sequences(val_df, 'GHI', SEQUENCE_LENGTH)
test_sequences = create_sequences(test_df, 'GHI', SEQUENCE_LENGTH)

  0%|          | 0/38205 [00:00<?, ?it/s]

  0%|          | 0/8092 [00:00<?, ?it/s]

  0%|          | 0/8093 [00:00<?, ?it/s]

In [26]:
len(train_sequences), len(val_sequences), len(test_sequences)

(38205, 8092, 8093)

# Pytorch Dataset

In [27]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [28]:
class SolarDataset(Dataset):
    
    def __init__(self, sequences):
        self.sequences = sequences
    
    def __len__(self):
        return len(self.sequences)
        
    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]

        return torch.Tensor(sequence.to_numpy()), torch.tensor(label).float()

In [29]:
BATCH_SIZE = 8

In [30]:
train_dataset = SolarDataset(train_sequences)
val_dataset = SolarDataset(val_sequences)
test_dataset = SolarDataset(test_sequences)

In [31]:
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = False)
val_dataloader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)

# Model

In [32]:
class StackedLSTMs(nn.Module):
    
    def __init__(self, n_features, n_hidden = 256, n_layers = 2):
        super().__init__()
        
        self.n_hidden = n_hidden
        
        self.lstm = nn.LSTM(
            input_size = n_features,
            hidden_size = n_hidden,
            batch_first = True,
            num_layers = n_layers,
            dropout = 0.2
        )
        
        self.regressor = nn.Linear(n_hidden, 1)
        
    def forward(self, x):
        self.lstm.flatten_parameters()
        
        _, (hidden, _) = self.lstm(x)
        out = hidden[-1]
        
        return self.regressor(out)

In [33]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [34]:
EPOCHS = 200
LEARNING_RATE = 0.001

In [35]:
model = StackedLSTMs(train_df.shape[1])
model = model.to(device)
criterion = nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [36]:
model

StackedLSTMs(
  (lstm): LSTM(1, 256, num_layers=2, batch_first=True, dropout=0.2)
  (regressor): Linear(in_features=256, out_features=1, bias=True)
)

In [37]:
!mkdir checkpoints2

In [None]:
model.train()
min_valid_loss = np.inf

total_step=len(train_dataloader)
for epoch in range(EPOCHS):
  train_loss = 0.0
  for sequences, labels in tqdm(train_dataloader):
    sequences, labels = sequences.to(device), labels.to(device)
    
    optimizer.zero_grad()

    outputs = model(sequences)
    loss = criterion(outputs,labels.unsqueeze(1))

    # Backward and optimize
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
  
  val_loss = 0.0
  with torch.set_grad_enabled(False):
    for sequences, labels in val_dataloader:
      # Transfer to GPU
        sequences, labels = sequences.to(device), labels.to(device)

        outputs = model(sequences)
        loss = criterion(outputs, labels.unsqueeze(1))
        val_loss += loss.item()

  val_loss = val_loss / len(val_dataloader)
  train_loss = train_loss / len(train_dataloader)

  print(f'Epoch {epoch+1} \t\t Training Loss: {train_loss} \t\t Validation Loss: {val_loss}')
  torch.save(model.state_dict(), 'checkpoints2/epoch_' + str(epoch) + '.pth')


  if min_valid_loss > val_loss:
    print('Validation Loss Decreased from',min_valid_loss,'to ',val_loss,', Saving the model')
    min_valid_loss = val_loss
         
    # Saving State Dict
    torch.save(model.state_dict(), 'checkpoints2/best_model.pth')



  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 1 		 Training Loss: 0.04626579451930534 		 Validation Loss: 0.039137006580614274
Validation Loss Decreased from inf to  0.039137006580614274 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 2 		 Training Loss: 0.04386079756477252 		 Validation Loss: 0.041315265027244794


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 3 		 Training Loss: 0.040882895673296886 		 Validation Loss: 0.03874226166289705
Validation Loss Decreased from 0.039137006580614274 to  0.03874226166289705 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 4 		 Training Loss: 0.03936745183906455 		 Validation Loss: 0.03687047504624601
Validation Loss Decreased from 0.03874226166289705 to  0.03687047504624601 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 5 		 Training Loss: 0.03854309904949838 		 Validation Loss: 0.0366083555663899
Validation Loss Decreased from 0.03687047504624601 to  0.0366083555663899 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 6 		 Training Loss: 0.03792546328465172 		 Validation Loss: 0.03594713876378886
Validation Loss Decreased from 0.0366083555663899 to  0.03594713876378886 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 7 		 Training Loss: 0.037238533043417 		 Validation Loss: 0.03554594069222364
Validation Loss Decreased from 0.03594713876378886 to  0.03554594069222364 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 8 		 Training Loss: 0.0379934283879369 		 Validation Loss: 0.04528450757136137


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 9 		 Training Loss: 0.03786915006278846 		 Validation Loss: 0.035715111473972334


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 10 		 Training Loss: 0.036500620988931774 		 Validation Loss: 0.035485122940609355
Validation Loss Decreased from 0.03554594069222364 to  0.035485122940609355 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 11 		 Training Loss: 0.036234755199823165 		 Validation Loss: 0.03545938812383992
Validation Loss Decreased from 0.035485122940609355 to  0.03545938812383992 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 12 		 Training Loss: 0.03592058067703891 		 Validation Loss: 0.035122020741873566
Validation Loss Decreased from 0.03545938812383992 to  0.035122020741873566 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 13 		 Training Loss: 0.035709949173124825 		 Validation Loss: 0.03506928069335031
Validation Loss Decreased from 0.035122020741873566 to  0.03506928069335031 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 14 		 Training Loss: 0.035359350879581954 		 Validation Loss: 0.0347532689312092
Validation Loss Decreased from 0.03506928069335031 to  0.0347532689312092 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 15 		 Training Loss: 0.035224611656344884 		 Validation Loss: 0.035110310900297474


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 16 		 Training Loss: 0.035287082603084476 		 Validation Loss: 0.03463155539301887
Validation Loss Decreased from 0.0347532689312092 to  0.03463155539301887 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 17 		 Training Loss: 0.03566337194801787 		 Validation Loss: 0.036088483837978885


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 18 		 Training Loss: 0.03508557200488307 		 Validation Loss: 0.034785666097859255


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 19 		 Training Loss: 0.03466993633318265 		 Validation Loss: 0.03459359791747335
Validation Loss Decreased from 0.03463155539301887 to  0.03459359791747335 , Saving the model


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 20 		 Training Loss: 0.034614434523632366 		 Validation Loss: 0.03472948028602943


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 21 		 Training Loss: 0.03468313689501344 		 Validation Loss: 0.034738519037691803


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 22 		 Training Loss: 0.03444749317252563 		 Validation Loss: 0.034710584827717175


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 23 		 Training Loss: 0.03491338179447214 		 Validation Loss: 0.034984503140265355


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 24 		 Training Loss: 0.03425856390715653 		 Validation Loss: 0.03487222999395994


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 25 		 Training Loss: 0.03439865185889862 		 Validation Loss: 0.03493292283705296


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 26 		 Training Loss: 0.034394307583596864 		 Validation Loss: 0.03475732708581274


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 27 		 Training Loss: 0.03418107888789489 		 Validation Loss: 0.035253812410438935


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 28 		 Training Loss: 0.03370680334623715 		 Validation Loss: 0.03484681279269892


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 29 		 Training Loss: 0.03381883192426448 		 Validation Loss: 0.0349093777714357


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 30 		 Training Loss: 0.0333680504852635 		 Validation Loss: 0.03496667965937161


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 31 		 Training Loss: 0.033254282867838846 		 Validation Loss: 0.035212150725028514


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 32 		 Training Loss: 0.032938252604329823 		 Validation Loss: 0.03514044785513423


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 33 		 Training Loss: 0.032711085959884895 		 Validation Loss: 0.03548734403256071


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 34 		 Training Loss: 0.03234946590756504 		 Validation Loss: 0.03520301784760974


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 35 		 Training Loss: 0.03258365003786526 		 Validation Loss: 0.035414791909615204


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 36 		 Training Loss: 0.032280535279153796 		 Validation Loss: 0.035326592707932876


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 37 		 Training Loss: 0.03238079219536029 		 Validation Loss: 0.03587106515052029


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 38 		 Training Loss: 0.0314041977741609 		 Validation Loss: 0.03567096140470219


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 39 		 Training Loss: 0.03212675130755843 		 Validation Loss: 0.036045216501719435


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 40 		 Training Loss: 0.031159889567744743 		 Validation Loss: 0.037311035726607544


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 41 		 Training Loss: 0.03051389445517937 		 Validation Loss: 0.03771253218620108


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 42 		 Training Loss: 0.030420404769217417 		 Validation Loss: 0.03905942033458199


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 43 		 Training Loss: 0.03004619780163819 		 Validation Loss: 0.038271449188556406


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 44 		 Training Loss: 0.029952910998418555 		 Validation Loss: 0.038434908205248496


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 45 		 Training Loss: 0.028979924797491775 		 Validation Loss: 0.03842512927950579


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 46 		 Training Loss: 0.029062345936395933 		 Validation Loss: 0.04140370394164888


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 47 		 Training Loss: 0.03089285349988573 		 Validation Loss: 0.039153753776665086


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 48 		 Training Loss: 0.030194695299106514 		 Validation Loss: 0.0388377936054923


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 49 		 Training Loss: 0.028012393852395158 		 Validation Loss: 0.040436172183891096


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 50 		 Training Loss: 0.02762303467966262 		 Validation Loss: 0.03960798193937353


  0%|          | 0/4776 [00:00<?, ?it/s]

Epoch 51 		 Training Loss: 0.027735726818472032 		 Validation Loss: 0.040446857949732636


  0%|          | 0/4776 [00:00<?, ?it/s]

In [None]:
len(train_dataloader)

# Testing

In [None]:
model.load_state_dict(torch.load('checkpoints/epoch_145.pth'))

<All keys matched successfully>

In [None]:
model.train()

StackedLSTMs(
  (lstm): LSTM(1, 64, num_layers=2, batch_first=True, dropout=0.2)
  (regressor): Linear(in_features=64, out_features=1, bias=True)
)

In [None]:
def evaluate_model(loader):
  model.eval()
  mse = 0.0
  mae = 0.0
  for sequences, labels in loader:
    # Transfer to GPU
    sequences, labels = sequences.to(device), labels.to(device)
    outputs = scaler.inverse_transform(model(sequences).cpu().detach().numpy())
    labels = scaler.inverse_transform(labels.unsqueeze(1).cpu())
    mse += ((outputs - labels)*(outputs - labels)).sum()
    mae += abs(outputs - labels).sum()

  mse /= BATCH_SIZE*len(loader)
  mae /= BATCH_SIZE*len(loader)

  print('MSE = ',mse)
  print('RMSE = ',mse**0.5)
  print('MAE = ',mae)

In [None]:
print('Results for Train Set:')
evaluate_model(train_dataloader)
print('Results for Validation Set:')
evaluate_model(val_dataloader)
print('Results for Test Set:')
evaluate_model(test_dataloader)

Results for Train Set
MSE =  8902.23532702579
RMSE =  94.35165778631443
MAE =  64.27958849772158
Results for Validation Set
MSE =  8651.952494406056
RMSE =  93.01587227138202
MAE =  64.81446821423594
Results for Test Set
MSE =  8530.721271734763
RMSE =  92.36190379011664
MAE =  64.71737222403576


In [None]:
print('Results for Train Set:')
evaluate_model(train_dataloader)
print('Results for Validation Set:')
evaluate_model(val_dataloader)
print('Results for Test Set:')
evaluate_model(test_dataloader)

Results for Train Set:
MSE =  6335.088167679495
RMSE =  79.59326710017308
MAE =  52.30359674032878
Results for Validation Set:
MSE =  10810.191938166403
RMSE =  103.97207287616422
MAE =  66.1886708628522
Results for Test Set:
MSE =  10509.205301090396
RMSE =  102.51441508924682
MAE =  66.75109027753597
