* TensorDataset instead of custom dataset
* all data directly into the GPU to speed up data reading and training
* Kfold
* torchhandle simplified training code

more information about torchhandle please check https://github.com/deephub-ai/torch-handle

In [None]:
!pip install torchhandle

# import 

In [None]:
import numpy as np 
import pandas as pd
import os
import random
import torch
import torchhandle
from torchhandle.workflow import BaseContext,Metric
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device,torchhandle.__version__

# random seed

In [None]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed) 
    #torch.backends.cudnn.deterministic = True  
    #torch.backends.cudnn.benchmark = True  
set_seed()

# read data

In [None]:
train_df=pd.read_csv("../input/tabular-playground-series-aug-2021/train.csv")
test_df=pd.read_csv("../input/tabular-playground-series-aug-2021/test.csv")
feat=[f'f{i}' for i in  range(100)]

# preprocess 

In [None]:
from sklearn.model_selection import KFold,StratifiedKFold,train_test_split
from sklearn.preprocessing import StandardScaler

x=train_df[feat].values
y=train_df[["loss"]].values
ss = StandardScaler()
x = ss.fit_transform(x)
test_scaled = ss.fit_transform(test_df[feat].values)


# model

In [None]:
class Net(torch.nn.Module):
    def __init__(self, ):
        super().__init__()
        self.layer = torch.nn.Sequential(
            torch.nn.Linear(100, 50),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(50, 1)
           )
    def forward(self, x):
        return self.layer(x)

# metric class

In [None]:
class RMSE(Metric):
    def __init__(self):
        self.diff = None

    def map(self, state):

        target = state.target_batch.cpu().detach() #.unsqueeze(dim=1)
        output = state.output_batch.cpu().detach()
        if self.diff is None:
            self.diff = torch.pow(target - output, 2)
        else:
            self.diff = torch.cat([self.diff, torch.pow(target - output, 2)], dim=0)

    def reduce(self):
        mse = torch.sum(self.diff) / self.diff.shape[0]
        rmse = torch.sqrt(mse)
        return [rmse]

    @property
    def name(self) -> list:
        return ["RMSE"]

    @property
    def best(self) -> list:
        return ["min"]

# hyper param

In [None]:
model = {"fn": Net}
criterion = {"fn": torch.nn.MSELoss}
optimizer = {"fn": torch.optim.Adam,
             "args": {"lr": 4e-3}
             }
metric_fn = [{"fn": RMSE}]
c = BaseContext(model=model,
                criterion=criterion,
                optimizer=optimizer,
                metric_fn=metric_fn,
                progress=None,
                context_tag="mlp")

# kfold and train

In [None]:
model_list=[]
kf = KFold(n_splits=7 ,random_state=2021,shuffle=True)
fold=1
for train_index, test_index in kf.split(x):
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    X_train=torch.tensor(X_train,dtype=torch.float32).to(device)
    y_train=torch.tensor(y_train,dtype=torch.float32).to(device)
    X_test=torch.tensor(X_test,dtype=torch.float32)
    y_test=torch.tensor(y_test,dtype=torch.float32)
    trn_ds =torch.utils.data.TensorDataset(X_train,y_train)
    val_ds =torch.utils.data.TensorDataset(X_test,y_test)
    trn_loader = torch.utils.data.DataLoader(trn_ds, batch_size=2048, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_ds, batch_size=2048)
    loaders = {"train": trn_loader, "valid": trn_loader}
    train = c.make_train_session(device, dataloader=loaders,fold_tag=str(fold))
    train.train(epochs=15)
    fold=fold+1
    model_list.append(train.model.cpu())

# pred

In [None]:
pred_list=[]
with torch.no_grad():
    for m in model_list:
        pred_list.append(m(torch.tensor(test_scaled, dtype=torch.float32)).numpy())
pred=np.mean(pred_list,axis=0)
pred.shape

# submit

In [None]:
test_df["loss"]=pred
sub=test_df[["id","loss"]]
sub.head()

In [None]:
sub.to_csv("submission.csv",index=False)