In [None]:
import torch
import numpy as np
import math
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.metrics import r2_score
import random
import matplotlib as mpl
import os
import gc
import pandas as pd
import csv
from numpy import *
from torch.utils.tensorboard import SummaryWriter
from datetime import date
import builtins
from sklearn.decomposition import PCA
# mpl.rcParams['figure.dpi'] = 180

In [None]:
writer = SummaryWriter()
writer = SummaryWriter(f"Training starting on:{date.today()}")
writer = SummaryWriter(comment="super_break")

In [None]:
features = np.load('./super_rep_features.npy', allow_pickle=True)
output_properties = np.load('./super_rep_output.npy', allow_pickle=True)
output_properties = output_properties.reshape((-1,1))

print('Input features:', features.shape)
print('output:', output_properties.shape)

In [None]:
## output data
is_available = (np.isnan(output_properties[:,0])==False)
index = np.where(is_available== True)
output_y = output_properties[index,0]
output_y = output_y.T
input = features[index,:][0]

print('input:', input.shape)
print('Output shape', output_y.shape)

In [None]:
## Split the train and test
x = np.arange(output_y.shape[0])
# x = np.reshape(x,(-1,1))
y = x 
X, x_test, Y, y_test = train_test_split( x, y, test_size=0.2, random_state=70)
x_train, x_valid, y_train, y_valid = train_test_split( X, Y, test_size=0.10, random_state=70)
n_samples_train = np.shape(x_train)[0] 
n_samples_test = np.shape(x_test)[0]
n_samples_valid = np.shape(x_valid)[0]
print('Number of train samples:', n_samples_train)
print('Number of valid samples:', n_samples_valid)
print('Number of test samples:', n_samples_test)

# saving the test sample set for future use
np.save('./x_test_super', x_test)

In [None]:
## Dataloader
batch_size = 32
class spiderdataset(Dataset) :
    def __init__(self,x_3, y, n_samples) :
        # data loading
        self.x3 = x_3
        self.y = y 
        self.n_samples = n_samples
        
        
    def __getitem__(self,index) :
        return self.x3[index], self.y[index]

    def __len__(self) :    
        return self.n_samples  


train_dataset = spiderdataset(input[x_train,:],output_y[x_train,:],n_samples_train)
test_dataset = spiderdataset(input[x_test,:],output_y[x_test,:],n_samples_test)
valid_dataset = spiderdataset(input[x_valid,:],output_y[x_valid,:],n_samples_valid)

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=1)
test_loader = DataLoader(dataset=test_dataset,
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=1)
valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=1)

In [None]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')

# CNN + RNN Model
class network(nn.Module):
    def __init__(self, feature_size):
        super(network, self).__init__()
        
        # nn layers
        
        self.nn = nn.Sequential(nn.Linear(feature_size,32),
                                nn.ReLU(),
                                nn.Linear(32,24),
                                nn.ReLU(),
                                nn.Linear(24,16),
                                nn.ReLU(),
                                nn.Linear(16,16),
                                nn.ReLU(),
                                nn.Linear(16,8),
                                nn.ReLU(),
                                nn.Linear(8,8),
                                nn.ReLU(),
                                nn.Linear(8,1)
                                )
    

        
    def forward(self, x):
        # out3 = x3
        # out5 = x5
        out = self.nn(x)

        return out


init_lr = 0.003
num_epochs = 5000
model = network(features.shape[1]).to(device)
print(model)
print('Number of trainable parameters:', builtins.sum(p.numel() for p in model.parameters()))

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=init_lr)

In [None]:
def weighted_loss(iter_y, iter_y_pred, s= torch.tensor(0.51), m= torch.tensor(1.18)):
    s= s.to(device)
    m = m.to(device)
    inverse_weight = (1/(2.5*s))*torch.exp(-0.5*(iter_y-m)**2/s**2)
    loss = (iter_y - iter_y_pred)*(iter_y - iter_y_pred)*(1/inverse_weight)
    # print(loss.shape)
    loss = torch.mean(loss)
    # loss = loss.dtype(torch.float32)
    return loss

In [None]:
## Training loop
largest_loss = 1E18
for epoch in range(num_epochs):
  loop = 0
  avg_loss = 0
  for i, (i_x,iter_y) in enumerate(train_loader):
    # parameters = torch.reshape(parameters,(len(parameters),))
    i_x = i_x.to(device).type(dtype=torch.float32)
    # print(iter_y.shape) 
    iter_y = iter_y.to(device).type(dtype=torch.float32)

    # forward pass    
    iter_y_pred = model(i_x)
    loss = criterion( iter_y, iter_y_pred)
    # loss = weighted_loss( iter_y, iter_y_pred)

    avg_loss = (avg_loss*i + loss.item())/(i+1)

    # backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


  with torch.no_grad():
    valid_loss = 0.0
    for j, (i_x,iter_y) in enumerate(valid_loader):
        # parameters = torch.reshape(parameters,(len(parameters),))
        i_x = i_x.to(device).type(dtype=torch.float32)
        # print(iter_y.shape) 
        iter_y = iter_y.to(device).type(dtype=torch.float32)

        # forward pass    
        iter_y_pred = model(i_x)
        loss = criterion( iter_y, iter_y_pred)
        # loss = weighted_loss( iter_y, iter_y_pred)

        valid_loss = (valid_loss*j + loss.item())/(j+1)
            
  if valid_loss < largest_loss:
    torch.save(model, f'./model/best.pth')
    largest_loss = valid_loss
            
  writer.add_scalar("Loss per epoch/train", avg_loss, epoch+1)
  writer.add_scalar("Loss per epoch/Valid`", valid_loss, epoch+1)
  # print('done epoch:', epoch+1)