In [None]:
import os
import sys
import datetime

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import optuna
from optuna.trial import TrialState

In [None]:
train_df = pd.read_csv("/kaggle/input/tabular-playground-series-jun-2021/train.csv")
test_df = pd.read_csv("/kaggle/input/tabular-playground-series-jun-2021/test.csv")
pd.set_option("display.max_columns", 77)

In [None]:
train_df.head()

In [None]:
train_df["target"].value_counts()

In [None]:
encoder = LabelEncoder()
train_df['target'] = encoder.fit_transform(train_df['target'])

In [None]:
feature_cols = list(train_df.columns)[1:-1]

In [None]:
scaler = StandardScaler()
train_df[feature_cols] = scaler.fit_transform(train_df[feature_cols])
test_df[feature_cols] = scaler.transform(test_df[feature_cols])

In [None]:
train_df.head()

In [None]:
X = train_df.drop(['target','id'],axis=1)
y = train_df['target']

In [None]:
X.head()

In [None]:
y.head()

In [None]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=0)

for train_index, val_index in sss.split(X, y):    
    print("TRAIN:\t", train_index, "Size:\t", len(train_index))
    print("VAL:\t", val_index, "Size:\t", len(val_index))
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]

In [None]:
y_train.value_counts()/y.value_counts()

In [None]:
y_val.value_counts()/y.value_counts()

## Optuna

In [None]:
CLASSES = 9
NUM_FEATURES = 75
DEVICE = torch.device("cpu" if not torch.cuda.is_available() else "cuda")

BATCHSIZE = 1000
EPOCHS = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 14
N_VALID_EXAMPLES = BATCHSIZE * 6

criterion = nn.CrossEntropyLoss()

def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 10)
    layers = []

    in_features = NUM_FEATURES
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 32, 1024)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU(inplace=True))
        p = trial.suggest_float("dropout_l{}".format(i), 0.1, 0.5)
        layers.append(nn.Dropout(p))
        layers.append(nn.BatchNorm1d(out_features))

        in_features = out_features
        
    layers.append(nn.Linear(in_features, CLASSES))

    return nn.Sequential(*layers)

def get_data(X_train, y_train, X_val, y_val):
    train = TensorDataset(torch.Tensor(np.array(X_train)), torch.Tensor(np.array(y_train)))
    train_loader = DataLoader(train, batch_size = 10000, shuffle = True)

    val = TensorDataset(torch.Tensor(np.array(X_val)), torch.Tensor(np.array(y_val)))
    val_loader = DataLoader(val, batch_size = 10000, shuffle = True)

    return train_loader, val_loader

def objective(trial):
    model = define_model(trial).to(DEVICE)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    
    train_loader, valid_loader = get_data(X_train, y_train, X_val, y_val)
    
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            
            loss = criterion(output, target.long())
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        val_loss = []
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                
                loss = criterion(output, target.long())
                val_loss.append(loss.item())
        
        avg_val_loss = np.mean(val_loss)

        trial.report(avg_val_loss, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return avg_val_loss

In [None]:
DEVICE

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50, timeout=600)

In [None]:
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
optuna.visualization.plot_intermediate_values(study).show()

## Training with the best parameters

### Stratified Split

In [None]:
batch_size = 1000

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=0)

for train_index, val_index in sss.split(X, y):    
    print("TRAIN:\t", train_index, "Size:\t", len(train_index))
    print("VAL:\t", val_index, "Size:\t", len(val_index))
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]

train = TensorDataset(torch.Tensor(np.array(X_train)), torch.Tensor(np.array(y_train)))
train_loader = DataLoader(train, batch_size = batch_size, shuffle = True)

val = TensorDataset(torch.Tensor(np.array(X_val)), torch.Tensor(np.array(y_val)))
val_loader = DataLoader(val, batch_size = batch_size, shuffle = True)

phases = ["train", "val"]
loaders = {"train": train_loader, "val": val_loader}

In [None]:
y_train.value_counts()/y.value_counts()

In [None]:
y_val.value_counts()/y.value_counts()

In [None]:
class classification_model(nn.Module):
    def __init__(self, n_in, n_out, layers, p=None):
        super(classification_model, self).__init__()

        all_layers = []
        self.n_in = n_in
        self.n_out = n_out

        for i in range(len(layers)):
            all_layers.append(nn.Linear(self.n_in, layers[i]))
            all_layers.append(nn.ReLU(inplace=True))
            if p:
                all_layers.append(nn.Dropout(p[i]))
            all_layers.append(nn.BatchNorm1d(layers[i]))
            self.n_in = layers[i]

        all_layers.append(nn.Linear(layers[-1], self.n_out))

        self.layers = nn.Sequential(*all_layers)

    def forward(self, x):
        x = self.layers(x)
        return x

    net_name = "classification_model"

In [None]:
"""
Best trial:
  Value:  1.7740078568458557
  Params: 
    n_layers: 2
    n_units_l0: 582
    dropout_l0: 0.32774551169492927
    n_units_l1: 73
    dropout_l1: 0.20755288576753822
    optimizer: RMSprop
    lr: 0.0009641167645278553
"""

layers_ = []
dropout_ = []
l_rate = trial.params["lr"]

for i in range(trial.params["n_layers"]):
    layers_.append(trial.params[f"n_units_l{i}"])
    dropout_.append(trial.params[f"dropout_l{i}"])

device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
#model = classification_model(75, 9, [1007, 552, 607, 564, 595], p=[0.4458008129590088, 0.41239986533350254, 0.4446225146055265, 0.42198656364503173, 0.10940623981859748])
model = classification_model(75, 9, layers_, p=dropout_)
model.to(device)

#l_rate = 0.0009624002880303564

criterion = nn.CrossEntropyLoss()
if trial.params["optimizer"] == "RMSprop":
    optimizer = optim.RMSprop(model.parameters(), lr=l_rate)
elif trial.params["optimizer"] == "Adam":
    optimizer = optim.Adam(model.parameters(), lr=l_rate)
elif trial.params["optimizer"] == "SGD":
    optimizer = optim.SGD(model.parameters(), lr=l_rate)

print(device)
print(model)

In [None]:
loss_train = []
loss_valid = []
best_validation_loss = 1000
best_epoch = 1
correct = 0
total = 0

n_epochs = 50

now = datetime.datetime.now()
weights_path = "./output/{:%Y%m%dT%H%M}".format(now)
os.makedirs(weights_path, exist_ok=True)

break_st = False

for epoch in range(1, n_epochs + 1):
    for phase in phases:
        if phase == "train":
            model.train()
        elif phase == "val":
            model.eval()
            
        for _, data in enumerate(loaders[phase], 0):
            features, y_true = data[0], data[1]
            features = features.to(device, dtype=torch.float)
            y_true = y_true.to(device, dtype=torch.float)

            optimizer.zero_grad()
            
            with torch.set_grad_enabled(phase == "train"):
                y_pred = model(features)
                
                sm = nn.Softmax(dim=1)
                pred_percentage = sm(y_pred)
                
                if break_st:
                    break
                    
                y_true = y_true.long()
                
                _, preds = torch.max(pred_percentage, 1)
                total += y_true.size(0)
                correct += (preds == y_true).sum().item()
                    
                loss = criterion(y_pred, y_true)
                
                if phase == "val":
                    loss_valid.append(loss.item())

                if phase == "train":
                    loss_train.append(loss.item())
                    loss.backward()
                        
                    optimizer.step()
        
        if break_st:
            break
        
        if phase == "train":
            mean_train_loss = np.mean(loss_train)
            acc_train = 100 * correct / total
            loss_train = []
            correct = 0
            total = 0
            
        if phase == "val":
            validation_loss = np.mean(loss_valid)
            acc_valid = 100 * correct / total
            loss_valid = []
            correct = 0
            total = 0
    
    if break_st:
        break
        
    if validation_loss < best_validation_loss:
        print("saving weights...")
        best_epoch = epoch
        best_validation_loss = validation_loss
        torch.save(model.state_dict(),
            os.path.join(weights_path, "model.pt"),
            )
        
    print(f"Epoch={epoch}/{n_epochs}\tloss={mean_train_loss:.4f}\tval_loss={validation_loss:.4f}\tacc={acc_train:.4f}\tval_acc={acc_valid:.4f}")
    

### Testing

In [None]:
test_df.head()

In [None]:
X_test = test_df.drop(['id'],axis=1)

test = TensorDataset(torch.Tensor(np.array(X_test)))
test_loader = DataLoader(test, batch_size = 100000, shuffle = False)

In [None]:
if not weights_path:
    print("Choose weights path")
    sys.exit()

device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")

model_name = weights_path + "/model.pt"
print(model_name)
state_dict = torch.load(model_name, map_location=device)
model.load_state_dict(state_dict)
model.eval()

for _, data in enumerate(test_loader, 0):
    features = data[0]
    print(features.size())
    features = features.to(device, dtype=torch.float)
    
    with torch.set_grad_enabled(False):
        y_pred = model(features)
        
        sm = nn.Softmax(dim=1)
        pred_percentage = sm(y_pred)
        
        print(pred_percentage.size())

print(pred_percentage.detach().cpu().numpy())
print("DONE!")

### Submission

In [None]:
sub = pd.read_csv("../input/tabular-playground-series-jun-2021/sample_submission.csv")
pred_array = pred_percentage.detach().cpu().numpy()
sub.loc[:,"Class_1":"Class_9"] = pred_array
sub = sub.set_index("id")
sub.head()

In [None]:
sub.to_csv("./output/submission.csv")