# Library Imports and Setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from time import time

from xgboost import XGBRegressor

import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
from torch.nn.utils import weight_norm as WN
import torch.nn.functional as F

from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

import random as r
r.seed(0)

# Helper Functions

In [2]:
def breaker():
    print("\n" + 30*"-"+ "\n")
    
def head(x, no_of_ele=5):
    breaker()
    print(x[:no_of_ele])
    breaker()
    
def r2loss(y_true, y_pred):
  y_mean = torch.mean(y_true)
  ss_tol = 0
  for i in range(y_true.shape[0]):
    ss_tol += (y_true[i] - y_mean)**2
  ss_res = torch.sum((y_true - y_pred)**2)
  return (1 - (ss_res/ss_tol))[0]

si_full = SimpleImputer(missing_values=np.nan, strategy="mean")
sc_X    = StandardScaler()
sc_y    = StandardScaler()

# Data Handling

In [3]:
tr_data_setup = pd.read_csv("/kaggle/input/predict-volcanic-eruptions-ingv-oe/train.csv")

y = tr_data_setup["time_to_eruption"].copy()
del tr_data_setup

tr_Set = pd.read_csv('/kaggle/input/volcano-80/train_data_80.csv')
ts_Set = pd.read_csv('/kaggle/input/volcano-80/test_data_80.csv')

**Dataset Template**

In [4]:
class DS(Dataset):
    def __init__(this, X=None, y=None, mode="train"):
        this.mode = mode
        this.X = X
        if mode == "train":
            this.y = y

    def __len__(this):
        return this.X.shape[0]
    
    def __getitem__(this, idx):
        if this.mode == "train":
            return torch.FloatTensor(this.X[idx]), torch.FloatTensor(this.y[idx])
        else:
            return torch.FloatTensor(this.X[idx])

In [5]:
X = tr_Set.copy().values
X = si_full.fit_transform(X)
X = sc_X.fit_transform(X)
del tr_Set

X_test = ts_Set.copy().values
X_test = si_full.transform(X_test)
X_test = sc_X.transform(X_test)
del ts_Set

y = y.values
y = si_full.fit_transform(y.reshape(-1,1)).reshape(-1)
y = sc_y.fit_transform(y.reshape(-1,1)).reshape(-1)

num_features = X.shape[1]

# ANN Setup and Training

**Setup**

In [6]:
class ANN_CFG():
    epochs = 50
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    IL = num_features
    HL = [64, 128, 32]
    OL = 1
    
    batch_size = 256
    
    
class ANN(nn.Module):
    def __init__(this, IL=None, HL=None, OL=None):
        super(ANN, this).__init__()
        
        this.BN1 = nn.BatchNorm1d(IL)
        this.DP1 = nn.Dropout(p=0.25)
        this.FC1 = WN(nn.Linear(IL, HL[0]))
        
        this.BN2 = nn.BatchNorm1d(HL[0])
        this.DP2 = nn.Dropout(p=0.5)
        this.FC2 = WN(nn.Linear(HL[0], HL[1]))
        
        this.BN3 = nn.BatchNorm1d(HL[1])
        this.DP3 = nn.Dropout(p=0.5)
        this.FC3 = WN(nn.Linear(HL[1], HL[2]))
        
        this.BN4 = nn.BatchNorm1d(HL[2])
        this.DP4 = nn.Dropout(p=0.5)
        this.FC4 = WN(nn.Linear(HL[2], OL))
        
    def getOptimizer(this, idx, Adam=True, SGD=False):
        if Adam:
            
            opts = [optim.Adam(this.parameters(), lr=1e-2),
                    optim.Adam(this.parameters(), lr=1e-2, weight_decay=1e-5),
                    
                    optim.Adam(this.parameters(), lr=5e-3),
                    optim.Adam(this.parameters(), lr=5e-3, weight_decay=1e-5),
                    
                    optim.Adam(this.parameters(), lr=1e-3),
                    optim.Adam(this.parameters(), lr=1e-3, weight_decay=1e-5)]
            return opts[idx]
        
        else:
            
            opts = [optim.SGD(this.parameters(), lr=1e-2), 
                    optim.SGD(this.parameters(), lr=1e-2, momentum=0.9),
                    optim.SGD(this.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-5),
                    
                    optim.SGD(this.parameters(), lr=1e-3), 
                    optim.SGD(this.parameters(), lr=1e-3, momentum=0.9),
                    optim.SGD(this.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-5),
                    
                    optim.SGD(this.parameters(), lr=1e-4), 
                    optim.SGD(this.parameters(), lr=1e-4, momentum=0.9),
                    optim.SGD(this.parameters(), lr=1e-4, momentum=0.9, weight_decay=1e-5),]       
            return opts[idx]

    def forward(this, x):
        x = this.BN1(x)
        x = this.DP1(x)
        x = F.relu(this.FC1(x))
        
        x = this.BN2(x)
        x = this.DP2(x)
        x = F.relu(this.FC2(x))
        
        x = this.BN3(x)
        x = this.DP3(x)
        x = F.relu(this.FC3(x))
        
        x = this.BN4(x)
        x = this.DP4(x)
        x = this.FC4(x)
        return x
    
cfg = ANN_CFG()


breaker()
print("Device     :", cfg.device)
print("Batch Size :", repr(cfg.batch_size))
print("Epochs     :", repr(cfg.epochs))
breaker()


------------------------------

Device     : cuda:0
Batch Size : 256
Epochs     : 50

------------------------------



**Training**

In [7]:
bestLoss = {"train": np.inf, "valid": np.inf}

n_folds = 5

LP = []
name_getter = []

seeders = [r.randint(0,99) for i in range(10)]
#seeders = [0]

start_time = time()
for seed in range(len(seeders)):
    breaker()
    print("Seed {seed}".format(seed=seeders[seed]))
    fold = 0
    
    for tr_idx, ts_idx in KFold(n_splits=n_folds, shuffle=True, random_state=seeders[seed]).split(X, y):
        X_train, X_valid = X[tr_idx], X[ts_idx]
        y_train, y_valid = y[tr_idx], y[ts_idx]
        
        name = "Seed_{seed}_Fold_{fold}".format(seed=seeders[seed], fold=fold)
        
        Net = ANN(cfg.IL, cfg.HL, cfg.OL)
        Net = Net.to(cfg.device)
        Net.train()
        
        optim_idx = 3
        optimizer = Net.getOptimizer(Adam=False, SGD=True, idx=optim_idx)
        
        tr_data_setup = DS(X_train, y_train.reshape(-1,1))
        va_data_setup = DS(X_valid, y_valid.reshape(-1,1))  

        dataloaders = { "train": DL(tr_data_setup, batch_size=cfg.batch_size, shuffle=True, num_workers=4,), #generator=seeds[seed]),
                        "valid" : DL(va_data_setup, batch_size=cfg.batch_size, shuffle=False, num_workers=4)}

        for e in range(cfg.epochs):
            epochLoss = {"train": 0, "valid": 0}
            for phase in ["train", "valid"]:
                if phase == "train":
                  Net.train()
                else:
                  Net.eval()
                lossPerPass = 0
                
                for tensor_x, tensor_y in dataloaders[phase]:
                    tensor_x, tensor_y = tensor_x.to(cfg.device), tensor_y.to(cfg.device)

                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == "train"):
                        output = Net(tensor_x)
                        loss_1 = nn.MSELoss()(output, tensor_y)
                        loss_2 = nn.L1Loss()(output, tensor_y)
                        loss = loss_1 + loss_2
                        if phase == "train":
                            loss.backward()
                            optimizer.step()
                    lossPerPass += loss.item() / len(dataloaders[phase])
                epochLoss[phase] = lossPerPass
            LP.append(epochLoss)
            if epochLoss["valid"] < bestLoss["valid"]:
                bestLoss = epochLoss
                torch.save(Net.state_dict(), name)
                name_getter.append(name)
        fold += 1        
                

breaker()
print("Time Taken to Train : %.2f minutes" % ((time() - start_time)/60))
breaker()


------------------------------

Seed 49

------------------------------

Seed 97

------------------------------

Seed 53

------------------------------

Seed 5

------------------------------

Seed 33

------------------------------

Seed 65

------------------------------

Seed 62

------------------------------

Seed 51

------------------------------

Seed 38

------------------------------

Seed 61

------------------------------

Time Taken to Train : 20.83 minutes

------------------------------



# Submission

In [8]:
ts_data_setup = DS(X_test, None, mode="test")
ts_data = DL(ts_data_setup, batch_size=cfg.batch_size, shuffle=False) #,num_workers=4)

y_pred = np.zeros((X_test.shape[0], 1))

for name in name_getter:
    Pred = torch.zeros(cfg.batch_size, 1).to(cfg.device)
    Net.load_state_dict(torch.load(name))
    Net.eval()
    for feat in ts_data:
        feat = feat.to(cfg.device)
        with torch.no_grad():
            output = Net(feat)
        Pred = torch.cat((Pred, output), dim=0)

    Pred = Pred[cfg.batch_size:]
    Pred = Pred.cpu().numpy()
    Pred = sc_y.inverse_transform(Pred)
    y_pred = np.add(y_pred, Pred)
    
y_pred = np.divide(y_pred, len(name_getter))

ss = pd.read_csv("/kaggle/input/predict-volcanic-eruptions-ingv-oe/sample_submission.csv")
ss["time_to_eruption"] = Pred
ss.to_csv("./submission.csv", index=False)

breaker()
print(ss.head(5))
breaker()


------------------------------

   segment_id  time_to_eruption
0  1000213997        24137492.0
1   100023368        24695074.0
2  1000488999        22801100.0
3  1001028887        23311502.0
4  1001857862        22054174.0

------------------------------

