# Initial Setup

In [1]:
!ls

drive  sample_data


In [1]:
%cd /content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/encoder-decoder/gujrat/implementation1

/content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/encoder-decoder/gujrat/implementation1


In [6]:
!pip install --quiet pytorch-lightning

In [4]:
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import math
import matplotlib

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
# import pytorch_lightning as pl
# from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
# from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

In [5]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE=['#01BEFE', '#FFDD00', '#FF7D00', '#FF006D', '#ADFF02', '#8F00FF']

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
                
rcParams['figure.figsize']= 12 , 8
                
tqdm.pandas()

In [6]:
# pl.seed_everything(42)

# Load Data

In [7]:
df = pd.read_csv('/content/drive/MyDrive/Projects/solar_irradiance_forecasting/sem2/datasets/gujrat_dataset.csv')

In [8]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,GHI
0,2000,1,1,0,0
1,2000,1,1,1,0
2,2000,1,1,2,0
3,2000,1,1,3,128
4,2000,1,1,4,337


In [9]:
# Selecting hours 3 to 12 (both included)
df = df.loc[(df['Hour'] >=3) & (df['Hour'] <= 12)]

In [10]:
df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour']], format = '%Y/%M/%D %H')

In [11]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,GHI,date
3,2000,1,1,3,128,2000-01-01 03:00:00
4,2000,1,1,4,337,2000-01-01 04:00:00
5,2000,1,1,5,513,2000-01-01 05:00:00
6,2000,1,1,6,640,2000-01-01 06:00:00
7,2000,1,1,7,714,2000-01-01 07:00:00


In [12]:
df = df[['date','GHI']]

In [13]:
df.head()

Unnamed: 0,date,GHI
3,2000-01-01 03:00:00,128
4,2000-01-01 04:00:00,337
5,2000-01-01 05:00:00,513
6,2000-01-01 06:00:00,640
7,2000-01-01 07:00:00,714


In [14]:
df.shape

(54750, 2)

# Pre-processing

In [15]:
features_df = df[['GHI']]

In [16]:
train_size = int(len(features_df) * 0.7)
val_size = int(len(features_df)*0.15)
test_size = int(len(features_df)*0.15)
print('Train Size = ', train_size)
print('Val Size = ', val_size)
print('Test Size = ', test_size)

Train Size =  38325
Val Size =  8212
Test Size =  8212


In [17]:
train_df, val_df, test_df = features_df[:train_size], features_df[train_size:train_size+val_size], features_df[train_size+val_size:]
train_df.shape, val_df.shape, test_df.shape

((38325, 1), (8212, 1), (8213, 1))

In [18]:
scaler = MinMaxScaler(feature_range = (-1,1))
scaler = scaler.fit(train_df)

In [19]:
train_df=pd.DataFrame(
    scaler.transform(train_df),
    index = train_df.index,
    columns = train_df.columns
)

In [20]:
val_df=pd.DataFrame(
    scaler.transform(val_df),
    index = val_df.index,
    columns = val_df.columns
)

In [21]:
test_df=pd.DataFrame(
    scaler.transform(test_df),
    index = test_df.index,
    columns = test_df.columns
)

In [22]:
def create_sequences(input_data: pd.DataFrame, target_column, sequence_length):
    
    sequences = []
    data_size = len(input_data)
    
    for i in tqdm(range(data_size - sequence_length)):
        
        sequence = input_data[i:i+sequence_length]
        
        label_position = i + sequence_length
        label = input_data.iloc[label_position][target_column]
        
        sequences.append((sequence, label))
        
    return sequences

In [23]:
SEQUENCE_LENGTH = 120 # 5 days

train_sequences = create_sequences(train_df, 'GHI', SEQUENCE_LENGTH)
val_sequences = create_sequences(val_df, 'GHI', SEQUENCE_LENGTH)
test_sequences = create_sequences(test_df, 'GHI', SEQUENCE_LENGTH)

  0%|          | 0/38205 [00:00<?, ?it/s]

  0%|          | 0/8092 [00:00<?, ?it/s]

  0%|          | 0/8093 [00:00<?, ?it/s]

In [24]:
len(train_sequences), len(val_sequences), len(test_sequences)

(38205, 8092, 8093)

# Pytorch Dataset

In [25]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [26]:
class SolarDataset(Dataset):
    
    def __init__(self, sequences):
        self.sequences = sequences
    
    def __len__(self):
        return len(self.sequences)
        
    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]

        return torch.Tensor(sequence.to_numpy()), torch.tensor(label).float()

In [27]:
BATCH_SIZE = 8

In [28]:
train_dataset = SolarDataset(train_sequences)
val_dataset = SolarDataset(val_sequences)
test_dataset = SolarDataset(test_sequences)

In [29]:
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = False)
val_dataloader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)

# Model

In [29]:
# class EncoderDecoder(nn.Module):
    
#     def __init__(self, n_features, n_hidden = 256, n_layers = 2):
#         super().__init__()
        
#         self.n_hidden = n_hidden
        
#         self.lstm = nn.LSTM(
#             input_size = n_features,
#             hidden_size = n_hidden,
#             batch_first = True,
#             num_layers = n_layers,
#             dropout = 0.2
#         )
        
#         self.regressor = nn.Linear(n_hidden, 1)
        
#     def forward(self, x):
#         self.lstm.flatten_parameters()
        
#         _, (hidden, _) = self.lstm(x)
#         out = hidden[-1]
        
#         return self.regressor(out)

In [None]:
# class RNNEncoder(nn.Module):
#     def __init__(self, rnn_num_layers=1, input_feature_len=1, sequence_len=168, hidden_size=100, bidirectional=False, device='cpu', rnn_dropout=0.2):
#         super().__init__()
#         self.sequence_len = sequence_len
#         self.hidden_size = hidden_size
#         self.input_feature_len = input_feature_len
#         self.num_layers = rnn_num_layers
#         self.rnn_directions = 2 if bidirectional else 1
#         self.gru = nn.GRU(
#             num_layers=rnn_num_layers,
#             input_size=input_feature_len,
#             hidden_size=hidden_size,
#             batch_first=True,
#             bidirectional=bidirectional,
#             dropout=rnn_dropout
#         )
#         self.device = device

#     def forward(self, input_seq):
#         ht = torch.zeros(self.num_layers * self.rnn_directions, input_seq.size(0), self.hidden_size, device=self.device)
#         if input_seq.ndim < 3:
#             input_seq.unsqueeze_(2)
#         gru_out, hidden = self.gru(input_seq, ht)
#         print(gru_out.shape)
#         print(hidden.shape)
#         if self.rnn_directions * self.num_layers > 1:
#             num_layers = self.rnn_directions * self.num_layers
#             if self.rnn_directions > 1:
#                 gru_out = gru_out.view(input_seq.size(0), self.sequence_len, self.rnn_directions, self.hidden_size)
#                 gru_out = torch.sum(gru_out, axis=2)
#             hidden = hidden.view(self.num_layers, self.rnn_directions, input_seq.size(0), self.hidden_size)
#             if self.num_layers > 0:
#                 hidden = hidden[-1]
#             else:
#                 hidden = hidden.squeeze(0)
#             hidden = hidden.sum(axis=0)
#         else:
#             hidden.squeeze_(0)
#         return gru_out, hidden

In [None]:
# class DecoderCell(nn.Module):
#     def __init__(self, input_feature_len, hidden_size, dropout=0.2):
#         super().__init__()
#         self.decoder_rnn_cell = nn.GRUCell(
#             input_size=input_feature_len,
#             hidden_size=hidden_size,
#         )
#         self.out = nn.Linear(hidden_size, 1)
#         self.attention = False
#         self.dropout = nn.Dropout(dropout)

#     def forward(self, prev_hidden, y):
#         rnn_hidden = self.decoder_rnn_cell(y, prev_hidden)
#         output = self.out(rnn_hidden)
#         return output, self.dropout(rnn_hidden)

In [None]:
# class EncoderDecoderWrapper(nn.Module):
#     def __init__(self, encoder, decoder_cell, output_size=3, teacher_forcing=0.3, sequence_len=336, decoder_input=True, device='cpu'):
#         super().__init__()
#         self.encoder = encoder
#         self.decoder_cell = decoder_cell
#         self.output_size = output_size
#         self.teacher_forcing = teacher_forcing
#         self.sequence_length = sequence_len
#         self.decoder_input = decoder_input
#         self.device = device

#     def forward(self, xb, yb=None):
#         if self.decoder_input:
#             decoder_input = xb[-1]
#             input_seq = xb[0]
#             if len(xb) > 2:
#                 encoder_output, encoder_hidden = self.encoder(input_seq, *xb[1:-1])
#             else:
#                 encoder_output, encoder_hidden = self.encoder(input_seq)
#         else:
#             if type(xb) is list and len(xb) > 1:
#                 input_seq = xb[0]
#                 encoder_output, encoder_hidden = self.encoder(*xb)
#             else:
#                 input_seq = xb
#                 encoder_output, encoder_hidden = self.encoder(input_seq)
#         prev_hidden = encoder_hidden
#         outputs = torch.zeros(input_seq.size(0), self.output_size, device=self.device)
#         y_prev = input_seq[:, -1, 0].unsqueeze(1)
#         for i in range(self.output_size):
#             step_decoder_input = torch.cat((y_prev, decoder_input[:, i]), axis=1)
#             if (yb is not None) and (i > 0) and (torch.rand(1) < self.teacher_forcing):
#                 step_decoder_input = torch.cat((yb[:, i].unsqueeze(1), decoder_input[:, i]), axis=1)
#             rnn_output, prev_hidden = self.decoder_cell(prev_hidden, step_decoder_input)
#             y_prev = rnn_output
#             outputs[:, i] = rnn_output.squeeze(1)
#         return outputs