# Begin

In [None]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Setting Environmental Variables

In [None]:
import time
import torch
import random

s = time.time()

seed = 42 #int(np.random.randint(0, 1e9))

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Random seed:", seed)
print("Device:", device)
basic_cols = ['investment_id', 'target']
num_feat = 300 #total of 300 feats from f_0 to f_299
N_INVID = 3774 #Max investment_id
features = [f'f_{i}' for i in range(num_feat)]
cols = basic_cols + features

col_dtypes = {
    #'row_id' : np.object,
    #'time_id' : np.int32,
    'investment_id' : np.int32,
    'target' : np.float32,
}
for i in range(300):
    col_dtypes[f"f_{i}"] = np.float32

In [None]:
full_train_df = pd.read_csv(
        "../input/ubiquant-market-prediction/train.csv",
        dtype=col_dtypes,
        usecols=cols
)
full_train_df

# Model creation and Training

In [None]:
from torch import nn

class RegressionModel(torch.nn.Module):
    def __init__(self, in_shape, out_shape, hidden, device='cpu'):
        super().__init__()
        self.in_shape = in_shape
        self.out_shape = out_shape
        self.hidden = hidden
        self.device = device
        self.initialize_weights()
        
    def initialize_weights(self):
        self.w1 = torch.nn.Parameter(torch.randn((self.hidden, self.in_shape), device=self.device, requires_grad=True))
        self.w2 = torch.nn.Parameter(torch.randn((self.out_shape, self.hidden), device=self.device, requires_grad=True))
        self.b1 = torch.nn.Parameter(torch.randn(1, device=self.device, requires_grad=True))
        self.b2 = torch.nn.Parameter(torch.randn(1, device=self.device, requires_grad=True))
        self.drop = torch.nn.Dropout(p=0.1)
    
    def forward(self, x):
        #basic linear computation
        y_hat = torch.add(torch.mm(self.w1, x.t()), self.b1)
        #Apply relu
        y_hat = self.drop(torch.relu(y_hat))
        #return regression out
        return torch.add(torch.mm(self.w2, y_hat), self.b2)

class TimeSeriesModel(nn.Module):
    def __init__(self, in_shape, out_shape=1, hidden_shape=64, embed_size=32, device=None):
        super().__init__()
        self.in_shape = in_shape
        self.out_shape = out_shape
        self.hidden_shape = hidden_shape
        self.embed_size = embed_size
        self.layer_dim = 16
        self.device = device
        self.init_layers()
    
    def init_layers(self):
        self.embedding = nn.Embedding(N_INVID+1, self.embed_size).to(self.device)
        self.gru = nn.GRU(self.in_shape, self.hidden_shape, self.layer_dim,
                          batch_first=True, dropout=0.1).to(self.device)
        self.out = RegressionModel(self.hidden_shape, self.out_shape, self.layer_dim, self.device)
    
    def forward(self, x):
        #Embed investment_id
        emb = self.embedding(x[0])
        emb = torch.mul(x[1], emb)
        emb = torch.reshape(emb, (x[0].size(0), -1, emb.size(1)))
        emb = torch.reshape(x[1], (x[1].size(0), -1, x[1].size(1)))
        
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, emb.size(0), self.hidden_shape,
                         device=self.device, requires_grad=True)
        
        # Forward propagation by passing in the input and hidden state into the model
        y_hat, _ = self.gru(emb, h0.detach())
        y_hat = torch.reshape(y_hat, (y_hat.size(0)*y_hat.size(1), -1))
        
        y_hat = self.out(y_hat)
        return y_hat[-1, :]

In [None]:
batch_size = 65536
batches = []
for i in range(0, full_train_df.shape[0], batch_size):
    batches.append((i, min(full_train_df.shape[0], i+batch_size)))

In [None]:
from sklearn.model_selection import train_test_split as tts
from IPython.display import clear_output
import torch.optim as optim

def loss(y_predicted, y_target):
    #RMSE Loss
    return torch.sum((y_predicted - y_target)**2)

model = TimeSeriesModel(num_feat, hidden_shape=64, embed_size=1, device=device)
epochs = 1000
verbose = max(1, epochs // 100)
tol = epochs//5

optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                 len(batches),
                                                 last_epoch=-1,
                                                 eta_min=1e-5,
                                                 verbose=False)
min_loss = np.inf
cnt = 0

train_size = 0
valid_size = 0

for t in tqdm(range(1, epochs+1), desc="Training"):
    #clear_output(wait=True)
    # Set the gradients to 0.
    optimizer.zero_grad()
    total_loss = 0.0
    valid_loss = 0.0
    total_train_size = 0
    total_valid_size = 0
    
    for start, end in batches:
        #Split train and validation data
        x_dataset, x_valid, y_dataset, y_valid = tts(
            full_train_df.iloc[start:end][['investment_id']+features],
            full_train_df.iloc[start:end]['target'].values,
            test_size=0.1, shuffle=True, random_state=seed)
        total_train_size += x_dataset.shape[0]
        total_valid_size += x_valid.shape[0]
        
        #Train
        tx_data = torch.tensor(x_dataset[features].values, dtype=torch.float).to(device)
        tinv_data = torch.tensor(x_dataset['investment_id'].values, dtype=torch.int).to(device)
        ty_data = torch.tensor(y_dataset, dtype=torch.float).to(device)
        
        #Validation
        vx_data = torch.tensor(x_valid[features].values, dtype=torch.float).to(device)
        vinv_data = torch.tensor(x_valid["investment_id"].values, dtype=torch.int).to(device)
        vy_data = torch.tensor(y_valid, dtype=torch.float).to(device)
    
    
        # Main optimization loop
        model.train()
        # Compute the current predicted y's from x_dataset
        y_predicted = model((tinv_data, tx_data))
        # See how far off the prediction is
        current_loss = loss(y_predicted, ty_data)
        total_loss += current_loss
        
        # Compute the gradient of the loss
        current_loss.backward()
        # Update model W and b accordingly.
        optimizer.step()
        # Update LR of optimizer
        scheduler.step()
        
        #Compute validation loss
        with torch.no_grad():
            model.eval() #Change model to evaluation mode
            vloss = loss(model((vinv_data, vx_data)), vy_data)
            valid_loss += vloss

    #Check for early stopping
    if valid_loss >= min_loss:
        cnt += 1
        if cnt >= tol:
            print("Early stopping!")
            break
    else:
        #Save the model weights
        torch.save(model.state_dict(), "model_weights.pth")
        min_loss = valid_loss
        cnt = 0

    if t%verbose==0:
        print(f"epoch = {t:2}/{epochs}, " +
              f"RMSE loss = {torch.sqrt(total_loss/total_train_size):.6f}, " +
              f"MSE loss = {(total_loss/total_train_size):.6f}, " +
              f"RMSE valid_loss = {torch.sqrt(valid_loss/total_valid_size):.6f}, " +
              f"min_loss = {torch.sqrt(min_loss/total_valid_size):.6f}, " +
              f"cnt={cnt}"
             )
        model.train() #Return to train mode
    
print(f"Total time spent: {time.time()-s:.4f} seconds")

# Prediction

In [None]:
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test()

In [None]:
#Switch to evaluation mode
model.load_state_dict(torch.load("model_weights.pth"))
model.eval()
for (test_df, sample_prediction_df) in iter_test:
    inv_x = torch.tensor(test_df['investment_id'].values, dtype=torch.int).to(device)
    test_x = torch.tensor(test_df[features].values, dtype=torch.float).to(device)
    pred = model((inv_x, test_x))
    sample_prediction_df['target'] = pred.detach().cpu().numpy()
    env.predict(sample_prediction_df)
    display(sample_prediction_df)