In [None]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

from typing import Any, Dict

import numpy as np
from rtdl_revisiting_models import MLP, ResNet, FTTransformer
import scipy.special
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection
import sklearn.preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
import zero
import pandas as pd
import numpy as np
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

import os

os.chdir('/home/mattyshen/interpretableDistillation') #/simulations/Tabular_Gemstone_Price')

from interpretDistill.fourierDistill import FTDistill, FTDistillCV
from interpretDistill.binaryTransformer import BinaryTransformer

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

from interpretDistill.FTutils import *

## Load dataset

In [None]:
df_train = pd.read_csv('/home/mattyshen/interpretableDistillation/simulations/Tabular_Gemstone_Price/data/train.csv').drop(columns =['id'])
#df_test = pd.read_csv('/home/mattyshen/interpretableDistillation/simulations/Tabular_Gemstone_Price/data/test.csv').fillna(0)

In [None]:
df_train

In [None]:
device = "cpu" #torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Docs: https://yura52.github.io/zero/0.0.4/reference/api/zero.improve_reproducibility.html
zero.improve_reproducibility(seed=123456)

### Split train and validation data

In [None]:
X = {}
y = {}
X['train'], X['test'], y['train'], y['test'] = sklearn.model_selection.train_test_split(
    df_train.drop(columns = ['price']), df_train['price'], train_size=0.8
)
X['train'], X['val'], y['train'], y['val'] = sklearn.model_selection.train_test_split(
    X['train'], y['train'], train_size=0.8
)

In [None]:
X_b = X.copy()

In [None]:
bt = BinaryTransformer(depth = 2, bit = False)
X_b['train'] = bt.fit_and_transform(X['train'], y['train'])
X_b['val'] = bt.transform(X['val'])
X_b['test'] = bt.transform(X['test'])

In [None]:
X['train'].isna().to_numpy().sum(), X['val'].isna().to_numpy().sum(), X['test'].isna().to_numpy().sum()

In [None]:
X['train'].shape, X['val'].shape, X['test'].shape

In [None]:
y['train'].shape, y['val'].shape, y['test'].shape

In [None]:
task_type = 'regression'

## Preprocess data

In [None]:
d_out = 1
mlp = MLP(
    d_in=X['train'].shape[1],
    d_out=d_out,
    n_blocks=2,
    d_block=384,
    dropout=0.1,
)
resnet = ResNet(
    d_in=X['train'].shape[1],
    d_out=d_out,
    n_blocks=2,
    d_block=192,
    d_hidden=None,
    d_hidden_multiplier=2.0,
    dropout1=0.15,
    dropout2=0.15,
)
mlp_b = MLP(
    d_in=X_b['train'].shape[1],
    d_out=d_out,
    n_blocks=2,
    d_block=384,
    dropout=0.1,
)
resnet_b = ResNet(
    d_in=X_b['train'].shape[1],
    d_out=d_out,
    n_blocks=2,
    d_block=192,
    d_hidden=None,
    d_hidden_multiplier=2.0,
    dropout1=0.15,
    dropout2=0.15,
)
# fttransformer = FTTransformer(
#     n_cont_features=0,
#     cat_cardinalities=[1]*X['train'].shape[1],
#     d_out=d_out,
#     n_blocks=3,
#     d_block=192,
#     attention_n_heads=8,
#     attention_dropout=0.2,
#     ffn_d_hidden=None,
#     ffn_d_hidden_multiplier=4 / 3,
#     ffn_dropout=0.1,
#     residual_dropout=0.0,
# )

In [None]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

# Define DataLoader for training and validation sets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=64)

criterion = nn.MSELoss()
models = [mlp, resnet]
optimizers = []

best_val_loss = [float('inf'), float('inf')]
best_model_path = 'best_model.pth'
for epoch in range(num_epochs):
    for model_idx, model in enumerate(models):
        optimizer = optimizers[model_idx]
        model.train()
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        # Validation loop
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                val_loss += criterion(outputs, targets).item() * inputs.size(0)
        
        val_loss /= len(val_loader.dataset)
        print(f'Epoch {epoch+1}/{num_epochs}, Model {model_idx+1}, Validation Loss: {val_loss}')

        # Save the model if the validation loss has decreased
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), best_model_path)
            print("Saving model with best validation loss")

# Load the best model
best_model = Model()
best_model.load_state_dict(torch.load(best_model_path))


In [None]:
y_pred_mlp = fttransformer(torch.tensor(X['train'].values, dtype = torch.float32))

In [None]:
np.mean((y_pred_mlp.detach().numpy().reshape(-1, ) - y['train'].to_numpy())**2)

In [None]:
model.to(device)
optimizer = (
    model.make_default_optimizer()
    if isinstance(model, rtdl.FTTransformer)
    else torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
)
loss_fn = F.mse_loss

In [None]:
def apply_model(x_num, x_cat=None):
    if isinstance(model, rtdl.FTTransformer):
        return model(x_num.float(), x_cat)
    elif isinstance(model, (rtdl.MLP, rtdl.ResNet)):
        assert x_cat is None
        return model(x_num)
    else:
        raise NotImplementedError(
            f'Looks like you are using a custom model: {type(model)}.'
            ' Then you have to implement this branch first.'
        )

In [None]:

@torch.no_grad()
def evaluate(part):
    model.eval()
    prediction = []
    for batch in zero.iter_batches(torch.cat((X_num[part],X_cat[part]), 1) ,1024):
#         print(apply_model(batch))
        prediction.append(apply_model(batch[:,:58], batch[:, -4:].to(torch.int64)))
    prediction = torch.cat(prediction).squeeze(1).cpu().numpy()
    target = y[part].cpu().numpy()

    if task_type == 'binclass':
        prediction = np.round(scipy.special.expit(prediction))
        score = sklearn.metrics.accuracy_score(target, prediction)
    elif task_type == 'multiclass':
        prediction = prediction.argmax(1)
        score = sklearn.metrics.accuracy_score(target, prediction)
    else:
        assert task_type == 'regression'
        score = sklearn.metrics.mean_squared_error(target, prediction) ** 0.5
    return score



batch_size = 64
train_loader = zero.data.IndexLoader(len(X['train']), batch_size, device=device)

progress = zero.ProgressTracker(patience=100)

print(f'Test score before training: {evaluate("test"):.4f}')

## Train model

In [None]:
n_epochs = 4 # can increase the epoch size 
report_frequency = len(X['train']) // batch_size // 5
for epoch in range(1, n_epochs + 1):
    for iteration, batch_idx in enumerate(train_loader):
        model.train()
        optimizer.zero_grad()
        x_num_batch = X_num['train'][batch_idx]
        x_cat_batch = X_cat['train'][batch_idx].to(torch.int64)
        y_batch = y['train'][batch_idx]
        loss = loss_fn(apply_model(x_num_batch, x_cat_batch).squeeze(1), y_batch)
        loss.backward()
        optimizer.step()
        if iteration % report_frequency == 0:
            print(f'(epoch) {epoch} (batch) {iteration} (loss) {loss.item():.4f}')

    val_score = evaluate('val')
    test_score = evaluate('test')
    print(f'Epoch {epoch:03d} | Validation score: {val_score:.4f} | Test score: {test_score:.4f}', end='')
    progress.update((-1 if task_type == 'regression' else 1) * val_score)
    if progress.success:
        print(' <<< BEST VALIDATION EPOCH', end='')
    print()
    if progress.fail:
        break

In [None]:
torch.save(model, 'model_params.pt')

### Load model using GPU to predict

In [None]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    print('using device: cuda')
else:
    print('using device: cpu')

In [None]:
model = torch.load('../input/model-params/model_params.pt')

In [None]:
if USE_GPU and torch.cuda.is_available():
    test_num = test_num.float().cuda()
    test_cat = test_cat.to(torch.int64).cuda()
    dtype1 = torch.cuda.FloatTensor
    dtype2 = torch.cuda.IntTensor
    model.cuda()
else:
    test_num = test_num.float()
    test_cat = test_cat.to(torch.int64)
    dtype1 = torch.FloatTensor
    dtype2 = torch.cuda.IntTensor

test_num = Variable(test_num).type(dtype1)
test_cat = Variable(test_cat).type(dtype2)


In [None]:
model.eval()
with torch.no_grad():
    predict = model(test_num, test_cat)

In [None]:
Submission = pd.DataFrame()
Submission['id'] = df_test['id']
Submission['site_eui'] = (predict.cpu().detach().numpy() * y_std) + y_mean
Submission.to_csv('submission.csv', index=False)

### Reference
* https://github.com/yandex-research/rtdl/blob/main/examples/rtdl.ipynb
* https://arxiv.org/abs/2106.11959
* https://yandex-research.github.io/rtdl/stable/index.html