### points to take care of now onwards
1. Add PCA components
1. Search how to make the NN deeper
1. Get it on the GPU
1. Plot training losses with the validation losses
1. **use KFold CV**

# usual imports

In [None]:
import os
import pandas as pd
import numpy as np
import gc

import matplotlib.pyplot as plt
from scipy import stats# Imports
import torch

import torchvision
import torch.nn as nn

import torch.nn.functional as F

from torch.utils.data import DataLoader, TensorDataset, random_split

In [None]:
gc.collect()

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## Import dataset

In [None]:
n_features = 300
features = [f'f_{i}' for i in range(n_features)]
train = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl')
train.head(2)

In [None]:
inputs = train.drop(['target'], axis=1).values
targets = train[['target']].values

inputs.shape, targets.shape

### 80 % split number - just for splitting

In [None]:
val_1 = int(0.8*inputs.shape[0])
val_2 = int(0.2*inputs.shape[0])
val_1, val_2

### Hyperparameters

In [None]:
batch_size = 2000
TARGET_COLUMN = 'target'
input_size=302
output_size=1

In [None]:
del train
gc.collect()

# Convert to PyTorch dataset (DataLoader)

In [None]:
dataset = TensorDataset(torch.tensor(inputs, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32))
train_ds, val_ds = random_split(dataset, [val_1, val_2])

train_loader = DataLoader(train_ds, batch_size, shuffle=False) # future predict karna hai na
val_loader = DataLoader(val_ds, batch_size*2)

In [None]:
del train_ds, val_ds, dataset, inputs, targets
gc.collect()

## GPU Utilities
#### these will help later to get our models/dataloaders on the GPU!

In [None]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

#### Check if GPU is avaliable

In [None]:
torch.cuda.is_available()

In [None]:
device = get_default_device()
device

### "Push" to the GPU

In [None]:
train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(train_loader, device)

# This is the heart of the Neural Network!
**feel free to edit the layers anytime**

A lot of credit for this goes to 
1. Akash N S, for his Jovian.ai Course. This notebook specially makes use of functions from here https://jovian.ai/aakashns-6l3/deep-learning-project-live
1. @Pytonash's Recent notebook using Keras, and a very similar structure - End to end simple and powerful DNN with LeakyReLU - https://www.kaggle.com/pythonash/end-to-end-simple-and-powerful-dnn-with-leakyrelu
3. General answers from StackOverflow like this one, which helps to make out where should features be placed and ordered https://stackoverflow.com/questions/39691902/ordering-of-batch-normalization-and-dropout

In [None]:
class My_Kaggle_Model(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Activation functions have been chosen either as SiLU (called as Swish in Keras), and LeakyReLU
        # I have used them in alternate, please comment if this is a good practice or not!
        self.layers = nn.Sequential(nn.Linear(input_size, 64), 
                                    nn.BatchNorm1d(64), 
                                    nn.SiLU(), 
                                    
                                    nn.Linear(64, 128), 
                                    nn.BatchNorm1d(128), 
                                    # nn.LeakyReLU(0.1), 
                                    nn.SiLU(),
                                    nn.Dropout(0.4),
                                    
                                    nn.Linear(128, 256), 
                                    nn.BatchNorm1d(256), 
                                    nn.SiLU(), 
                                    nn.Dropout(0.4),
                                    
                                    nn.Linear(256, 512), 
                                    nn.BatchNorm1d(512), 
                                    # nn.LeakyReLU(0.1),
                                    nn.SiLU(),
                                    nn.Dropout(0.4), 
                                    
                                    nn.Linear(512, 256), 
                                    nn.BatchNorm1d(256), 
                                    nn.SiLU(),
                                    nn.Dropout(0.4),
                                    
                                    nn.Linear(256, 128), 
                                    nn.BatchNorm1d(128), 
                                    # nn.LeakyReLU(0.1),
                                    nn.SiLU(),
                                    nn.Dropout(0.4),
                                    
                                    nn.Linear(128, 8), 
                                    nn.BatchNorm1d(8), 
                                    nn.SiLU(), 
                                    nn.Dropout(0.4),
                                    
                                    nn.Linear(8, 1) )
    
        
    def forward(self, x):
        return self.layers(x)
    
    def training_step(self, batch):
        torch.cuda.empty_cache()
        gc.collect()
        inputs, targets = batch 
        inputs.to(device)
        targets.to(device)
        
        out = self(inputs)                 # Generate predictions
        loss = F.mse_loss(out, targets)    # Calculate loss
        return loss
    
    def validation_step(self, batch):
        torch.cuda.empty_cache()
        gc.collect()
        inputs, targets = batch 
        inputs.to(device)
        targets.to(device)
        
        out = self(inputs)                 # Generate predictions
        loss = F.mse_loss(out, targets)    # Calculate loss
        return {'val_loss': loss.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        return {'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}".format(epoch, result['train_loss'], result['val_loss']))
    
model = My_Kaggle_Model()

In [None]:
# shift model to GPU
model = to_device(model, device)

### Simple functions for evaluating and fitting

In [None]:
def evaluate(model, val_loader):
    model.eval()  # Setting to eval mode makes sure that dropouts are 'frozen'
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        model.train() # Setting to train mode
        train_losses = []
        
        for (i,batch) in enumerate(train_loader):
            torch.cuda.empty_cache()
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            if (i%200 == 0): print('batch number -- ', i)
            
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
        
#         # Early Stopping
#         if (result['valid_loss'][epoch] > result['valid_loss'][epoch - 1]) & (epoch >0):
#             trigger_times += 1
#             print('trigger times ', trigger_times)
            
#             if trigger_times >= patience:
#                 print('Early stopping!\nStart to test process.')
#                 return model
#         else:
#             print('trigger times: 0')
#             trigger_times = 0
            
    return history

### Checking if everything is on the GPU

In [None]:
train_loader.device, val_loader.device

In [None]:
type(model)

In [None]:
model.state_dict()['layers.0.weight']

*observe how the weights are also on the GPU, so nice to see!*

# Train!

In [None]:
learning_rate = 1e-1

In [None]:
gc.collect()

In [None]:
history = fit(10, learning_rate, model, train_loader, val_loader, opt_func=torch.optim.Adam)

In [None]:
def plot_losses(history):
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['val_loss'] for x in history]
    plt.plot(train_losses, '-bx')
    plt.plot(val_losses, '-rx')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss vs. No. of epochs');
history

In [None]:
# !/opt/bin/nvidia-smi
# optional function to check if you have a GPU or not

In [None]:
# train for more with lower learning rate
history2 = fit(10, 5e-3, model, train_loader, val_loader, opt_func=torch.optim.Adam)

In [None]:
plot_losses(history2)

**it is a good practice to check this curve and determine if our model is overfitting or not**

In [None]:
# train for more with lower learning rate
history3 = fit(10, 5e-5, model, train_loader, val_loader, opt_func=torch.optim.Adam)

In [None]:
plot_losses(history3)

### Record results

In [None]:
history3[-1]

In [None]:
# evaluate(model, train_loader), evaluate(model, val_loader)

### important to save the model!

In [None]:
torch.save(model.state_dict(), 'my_trained_model.pth')

# time to make predictions!

In [None]:
# val_ds[1][0].shape, val_ds[1][1].shape

### Simple function to predict

In [None]:
torch.cuda.empty_cache() # just to clear some GPU cache memory

#### lets see if it works on the train loader (it should!)

In [None]:
for batch in train_loader:
    model.eval() # not strictly necessary to put it in eval mode, because we took adequate care earlier
    data, target = batch
    print('data.shape', data.shape)
    print('data.device', data.device)
    preds = model(data)
    print('preds.shape', preds.shape)
    break # this is just for checking, so I break after one round

#### and on the valid loader

In [None]:
for batch in val_loader:
    model.eval()
    data, target = batch
    print('data.shape', data.shape)
    print('data.device', data.device)
    preds = model(data)
    print('preds.shape', preds.shape)
    break

In [None]:
preds[:5], target[:5]

In [None]:
gc.collect()

# Submission Time!

### simple function to predict on the test dataframe

In [None]:
cols_order = ['investment_id' , 'time_id'] + features

In [None]:
def predict_for_test_data(test_data):
    test_ds = TensorDataset(torch.tensor(test_data.values, dtype=torch.float32))
    submission_try = []
    
    for x in test_ds:
        model.eval()
        input_x = x[0].unsqueeze(0).cuda()
        pred = model(input_x)
        submission_try.append(pred)
        print("Prediction:", pred)
        
    submission_values = [float(i.detach()) for i in submission_try]
    return submission_values

In [None]:
# test_df

In [None]:
# predict_for_test_data(test_df)

### submit off!
Credits to @Melanie7744 for informing about the submission API. Here is the link to her work https://www.kaggle.com/melanie7744/understanding-the-submission-api-for-newbies

In [None]:
import ubiquant
env = ubiquant.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test set and sample submission
for (test_df, sample_prediction_df) in iter_test:
    
    print("test_df as loaded by the API")
    display(test_df.head(), test_df.shape)
    #display(sample_prediction_df.head(), sample_prediction_df.shape)
    
    # here you need to modify test_df to match the training data
    test_df['time_id'] = test_df.row_id.str.split("_", expand=True)[0].astype("int16") #re-create time_id
    test_df = test_df[cols_order]  
    print("test_df after selecting/creating the features the model was trained with")
    display(test_df.head(), test_df.shape)
    
    # Call our function to make predictions
    predictions = predict_for_test_data(test_df)
    sample_prediction_df['target'] = predictions  # make your predictions here
    env.predict(sample_prediction_df)   # register your predictions
    
    # print("Predictions for this time_id")
    # display(sample_prediction_df)
    # print("-----------time_id finished-----------\n\n")