In [None]:
import numpy as np
import pandas as pd

import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch.utils.data import random_split
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F

import pickle
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

#  Prepare data

In [None]:
# Load training data
data_types_dict = {
    'time_id': 'int16',
    'investment_id': 'int16',
    "target": 'float16',
}

features = [f'f_{i}' for i in range(300)]

for f in features:
    data_types_dict[f] = 'float16'
    
train = pd.read_csv('../input/ubiquant-market-prediction/train.csv',
                       usecols = data_types_dict.keys(),
                       dtype=data_types_dict,
                       )

train.head()

In [None]:
#convert datatype
X=train[features].to_numpy()
y=train['target'].to_numpy()
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)
dataset = TensorDataset(X, y)

In [None]:
#devide data
n_train=int(len(dataset)*0.6)
n_val=int(len(dataset)*0.2)
n_test=len(dataset) - n_train - n_val

torch.manual_seed(0)
tr, val, test = random_split(dataset, [n_train, n_val, n_test])

In [None]:
#Create data loaders
batch_size = 10000
train_loader = DataLoader(tr, batch_size, shuffle=True)
val_loader = DataLoader(val, batch_size)
test_loader = DataLoader(test, batch_size)

# Create Model

In [None]:
#Define model
class Net(pl.LightningModule):
    
    def __init__(self):
        super().__init__()
        
        #self.bn = nn.BatchNorm1d(300)
        self.fc1 = nn.Linear(300, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.25)
        
    def forward(self, x):
        #h = self.bn(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        return x
    
    def training_step(self, batch, batch_idx):
        x, t = batch
        y = self(x)
        t= t.unsqueeze(1)
        criterion = nn.MSELoss()
        #criterion = nn.L1Loss()
        loss = criterion(y, t)
        if(batch_idx%100 ==0): print('training loss:',loss)
        return loss
    
    def test_step(self, batch, batch_idx):
        x, t = batch
        y = self(x)
        t= t.unsqueeze(1)
        criterion = nn.MSELoss()
        #criterion = nn.L1Loss()
        loss = criterion(y, t)
        if(batch_idx%1000 ==0): print('test loss:',loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, t = batch
        y = self(x)
        t= t.unsqueeze(1)
        criterion = nn.MSELoss()
        #criterion = nn.L1Loss()
        loss = criterion(y, t)
        if(batch_idx%100 ==0): print('valid loss:',loss)
        return loss
    
    def configure_optimizers(self):
        #optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        return optimizer

# Learning

In [None]:
#Training
pl.seed_everything(0)
net = Net()
trainer = pl.Trainer(max_epochs = 5, gpus = torch.cuda.device_count())
trainer.fit(net, train_loader, val_loader)

# Validation

In [None]:
#Predict with test data
def predict_test(data_loader):
    
    with torch.no_grad():
        for i, batch in enumerate(data_loader):
            x, t=batch
            pre_t = net(x)
            
            if i == 0:
                t_all = t
                pre_t_all = pre_t
                
            else:
                t_all = torch.cat((t_all,t), dim=0)
                pre_t_all = torch.cat((pre_t_all, pre_t), dim=0)
    return t_all, pre_t_all

In [None]:
#Result of prediction with test data
t_all, pre_t_all = predict_test(test_loader)
plt.scatter(t_all, pre_t_all)

In [None]:
#Coefficience
K = torch.cat((t_all.reshape([1,t_all.shape[0]]), pre_t_all.reshape([1,t_all.shape[0]])),dim=0)
np.corrcoef(t_all.tolist(), pre_t_all.reshape([1,t_all.shape[0]]).tolist())

# Save Model

In [None]:
#Save model 
filename = 'model_net.pth'
torch.save(net.state_dict(), filename)

# Read Model

In [None]:
#Read model
filename = 'model_net.pth'
model_params = torch.load(filename)


net = Net()
net.load_state_dict(model_params)

# Submit

In [None]:
#Predict Target
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test()

for (test_df, sample_prediction_df) in iter_test:
    test_x = torch.tensor(test_df[features].values, dtype=torch.float)
    pred = net(test_x)
    sample_prediction_df['target'] = pred.detach().cpu().numpy()
        
    env.predict(sample_prediction_df) 
    display(sample_prediction_df)