In [1]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



# Imports

In [2]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
from tqdm.auto import tqdm
import numpy as np
from torch.autograd import Variable

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Setting the parameters

In [4]:
EXCEL_PATH = '/content/drive/MyDrive/CSE419/Data Mining.xlsx'
LEARNING_RATE = 0.01
RANDOM_SEED = 42

# Dataset Analysis

In [5]:
df = pd.read_excel(EXCEL_PATH)
df

Unnamed: 0,DATE,SW273,SW267,SW269
0,1979-06-01,2.60,5.52,5.42
1,1979-06-02,2.43,5.30,5.24
2,1979-06-03,2.40,5.07,5.09
3,1979-06-04,2.29,4.87,4.94
4,1979-06-05,2.20,4.69,4.79
...,...,...,...,...
5010,2009-10-27,3.50,6.13,5.59
5011,2009-10-28,3.34,5.97,5.50
5012,2009-10-29,3.20,5.82,5.42
5013,2009-10-30,3.12,5.70,5.36


In [6]:
# df[['SW267', 'SW269']].iloc[0]

In [7]:
df.describe()

Unnamed: 0,SW273,SW267,SW269
count,5015.0,5015.0,5015.0
mean,4.971986,8.797886,7.08864
std,1.24885,2.063672,1.233145
min,1.67,3.14,3.18
25%,4.05,7.73,6.46
50%,5.3,9.51,7.43
75%,5.9,10.34,7.97
max,7.78,12.44,9.75


In [8]:
df.SW273.isnull().sum(), df.SW267.isnull().sum(), df.SW269.isnull().sum()

(0, 0, 0)

# GPU or CPU

In [9]:
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
DEVICE

device(type='cpu')

# Custom Dataset Class

In [10]:
class FloodDataset(Dataset):
  """Custom Dataset class to work with FloodDataset"""
  def __init__(self, excel_path, train, items=None):
    df = pd.read_excel(excel_path)
    
    if items and train==True:
      df = df[:items]

    elif items and train==False:
      df = df[-items:]

    df['index'] = range(0, len(df))
    df = df.set_index('index')
    
    self.excel_path = excel_path
    self.features = df[['SW273','SW269']]
    self.targets = df[['SW267']]

  def __getitem__(self, index):
    return torch.tensor(self.features.iloc[index], dtype=torch.float32), torch.tensor(self.targets.iloc[index], dtype=torch.float32)

  def __len__(self):
    return len(self.features)

# Dataset and Dataloader

In [11]:
train_dataset = FloodDataset(excel_path=EXCEL_PATH,
                             train=True,
                             items = 4011)
test_dataset = FloodDataset(excel_path=EXCEL_PATH,
                             train=False,
                             items = 1003)
print(f'Length Train Dataset: {len(train_dataset)} \nLength of Test Dataset: {len(test_dataset)}')

train_loader = DataLoader(dataset=train_dataset,
                          batch_size = 1000,
                          shuffle=False,
                          num_workers = 0)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size = 1003,
                         shuffle=False,
                         num_workers=0)

print(f'Length Train Loader: {len(train_loader)} \nLength of Test Loader: {len(test_loader)}')

Length Train Dataset: 4011 
Length of Test Dataset: 1003
Length Train Loader: 5 
Length of Test Loader: 1


In [12]:
# train_dataset.input, train_dataset.output
# test_dataset.input, test_dataset.output

In [13]:
# sample = next(iter(train_loader))
# print(len(sample))
# inp, out = sample
# print(inp.shape, out.shape)
# type(inp) ,type(out)

# Building the ANN Model

In [14]:
class ANN(nn.Module):
  def __init__(self, in_channels, out_channels, hidden_size, num_classes):
    super(ANN, self).__init__()
    self.conv1 = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)
    self.relu1 = nn.ReLU()
    self.fc1 = nn.Linear(in_features=out_channels, out_features=hidden_size)
    self.relu2 = nn.ReLU()
    self.output_layer = nn.Linear(in_features=hidden_size, out_features=num_classes)

  def forward(self, x):
#     x = x.reshape(x.size(0), 1)
    x = self.conv1(x)
    x = self.relu1(x)
    x = self.fc1(x)
    x = self.relu2(x)
    x = self.output_layer(x)
    return x

# Initializing an ANN object with Optimizer

In [15]:
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)

model = ANN(2,20,10,1)
model = model.to(DEVICE)

# optimizer = torch.optim.SGD(model.parameters(), momentum = 0.5, lr = LEARNING_RATE)
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [16]:
print(model)

ANN(
  (conv1): Conv1d(2, 20, kernel_size=(1,), stride=(1,))
  (relu1): ReLU()
  (fc1): Linear(in_features=20, out_features=10, bias=True)
  (relu2): ReLU()
  (output_layer): Linear(in_features=10, out_features=1, bias=True)
)


# Evaluation and Loss Function

In [17]:
def Loss(preds, targets, train):
    error = preds-targets
    abs_error = torch.abs(error)
    
    mse = torch.sum(error**2)
    rmse = torch.sqrt(mse/len(preds))
    
    if train==True:
        return mse, rmse
    else:
        mae = torch.sum(abs_error)
        mape = torch.sum(torch.abs((targets-preds)/targets))

        targets_mean = torch.mean(targets)
        preds_mean = torch.mean(preds)
        r_square_numerator = torch.sum((targets-targets_mean)*(preds-preds_mean))**2
        r_square_denominator = torch.sum((targets-targets_mean)**2)*torch.sum((preds-preds_mean)**2)
        r_square = r_square_numerator / r_square_denominator

        esd = torch.sum((abs_error - torch.mean(abs_error))**2)
        return mse, rmse, mae, mape, r_square, esd

In [18]:
def calculate_scores(model, data_loader, DEVICE):
  criterion = Loss 
  train_examples = 0
  total_mae = total_mse = total_mape = total_r_square = total_esd = 0.0

  for batch, (features, targets) in enumerate(data_loader):
    features, targets = features.to(DEVICE), targets.to(DEVICE)
    preds = model(features)
    
    mse, rmse, mae, mape, r_square, esd = criterion(preds, targets, train=False)
    total_mae += mae
    total_mse += mse
    total_mape += mape
    train_examples +=len(features)
    total_r_square += r_square
    total_esd += esd

  total_RMSE_loss = format(torch.sqrt((total_mse)/train_examples), '.5f')
  total_mse_loss = format(total_mse/train_examples, '.5f')
  total_mae_loss = format(total_mae/train_examples, '.5f')
  total_mape_loss = format(total_mape/train_examples, '.5f')
  total_r_square = format(total_r_square, '.5f')
  total_esd = format(torch.sqrt(esd/train_examples), '.5f')
  

  # print(f'total_RMSE_loss: {total_RMSE_loss}\ntotal_mse_loss: {total_mse_loss}\ntotal_mae_loss: {total_mae_loss}\ntotal_mape_loss: {total_mape_loss}\ntotal_r_square:{total_r_square}\ntotal_esd: {total_esd}')
  return total_RMSE_loss, total_mse_loss, total_mae_loss, total_mape_loss, total_r_square, total_esd

# Train the ANN

In [19]:
result_dict = {}
criterion = Loss

for epoch in range(1,201):    
  train_examples = 0
  train_loss = 0

  for batch ,(features, targets) in enumerate(train_loader):
    features, targets = features.to(DEVICE), targets.to(DEVICE)
    preds = model(features)
    # print(type(preds), preds.shape)
    mse_loss, rmse_loss = criterion(preds, targets, train=True)
    optimizer.zero_grad()
    rmse_loss.backward()
    optimizer.step()
    
    train_examples += len(features)
    train_loss += mse_loss
  
  train_loss = torch.sqrt(train_loss/train_examples)
  print(f'Epoch = {epoch} RMSE Training Loss = {format(train_loss, ".5f")}')
  
  if epoch%10==0:
    print(f'\n\n\nTraining Eneded for {epoch} epochs')
    model.eval()
    with torch.set_grad_enabled(False):
      total_RMSE_loss, total_mse_loss, total_mae_loss, total_mape_loss, total_r_square, total_esd = calculate_scores(model, test_loader, DEVICE)
      print(f'MAPE: {total_mape_loss}')
      result_dict[str(epoch)+'E'] = total_mape_loss

RuntimeError: ignored

In [None]:
result_list = sorted(result_dict.items(), key=lambda item:item[1])

In [None]:
for item in result_list:
  str = item[0]+' '+item[1]+'\n'
  print(str)

In [None]:
# import torch
# t1 = torch.tensor([1,2,3], dtype=torch.float)
# t2 = torch.tensor([2,3,4], dtype=torch.float)

# t3 = torch.tensor([4,5,6], dtype=torch.float)
# t4 = torch.tensor([5,6,7], dtype=torch.float)

# # RMSE = 0

# # mse = nn.MSELoss()

# # RMSE += mse(t1,t2)
# # print(RMSE)
# # RMSE += mse(t3,t4)
# # torch.sqrt(RMSE/2)

In [None]:
# t1 = torch.tensor([1,2,3,4,5,6], dtype=torch.float)
# t2 = torch.tensor([2,3,4,5,6,7], dtype=torch.float)
# RMSE = 0

# mse = nn.MSELoss()

# RMSE = torch.sqrt(mse(t1,t2))
# print(RMSE)

In [None]:
# mse = 0
# mse += torch.sum((t1-t2)**2)
# mse += torch.sum((t3-t4)**2)
# torch.sqrt(mse/6)