In [None]:
!pip install optuna

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

import time
pd.set_option('display.max_columns', 100)

import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, recall_score, precision_score, classification_report
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

import optuna
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class PreProcess:
    
    def __init__(self):
        
        self.df_otto_train = pd.read_csv('/kaggle/input/otto-group-product-classification-challenge/train.csv')
        self.df_otto_test = pd.read_csv('/kaggle/input/otto-group-product-classification-challenge/test.csv')

    def __call__(self):
        
        conv = lambda x: int(x.replace('Class_', '')) - 1
        self.df_otto_train['target2'] = self.df_otto_train.target.map(conv)
        
        x = self.df_otto_train.drop(['id', 'target', 'target2'], axis=1).values
        y = self.df_otto_train.target2.values
        
        x_test_id = self.df_otto_test.id.values
        x_test = self.df_otto_test.drop(['id'], axis=1).values
        
        x_train, x_valid, t_train, t_valid = train_test_split(x, y, test_size=0.2, shuffle=True)
        
        _, _, _, counts = np.unique(t_train, return_index=True, return_inverse=True, return_counts=True)
        loss_weight = (1 / torch.Tensor(counts)) * (x_train.shape[0]/len(np.unique(t_train)))
        loss_weight = loss_weight.to(device)
        
        return x_train, x_valid, t_train, t_valid, x_test, x_test_id, loss_weight

In [None]:
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(MLP, self).__init__()
        
        n_layers = 5
        layers = []
        features = [522, 1006, 1304, 706, 851]
        p = [0.1287716033668473, 0.08497812991732616, 0.19230277862176845, 0.09567140996248122, 0.15776969001259095]
        
        in_features = in_dim
        for i in range(n_layers):
            out_features = features[i]
            layers.append(nn.Linear(in_features, out_features))
            layers.append(nn.BatchNorm1d(out_features))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(p[i]))
            
            in_features = out_features
            
        layers.append(nn.Linear(in_features, out_dim))
        layers.append(nn.LogSoftmax(dim=-1))  
        
        self.network = nn.Sequential(*layers)
        
    def forward(self, x):
        
        return self.network(x)
    
class MLP_tune(nn.Module):
    def __init__(self, trial, in_dim, out_dim):
        super(MLP_tune, self).__init__()
        
        n_layers = trial.suggest_int("n_layers", 5, 10)
        layers = []
        
        in_features = in_dim
        for i in range(n_layers):
            out_features = trial.suggest_int("n_units_l{}".format(i), in_dim, in_dim*20)
            layers.append(nn.Linear(in_features, out_features))
            layers.append(nn.BatchNorm1d(out_features))
            layers.append(nn.ReLU())
            p = trial.suggest_float("dropout_l{}".format(i), 0.0, 0.5)
            layers.append(nn.Dropout(p))
            
            in_features = out_features
            
        layers.append(nn.Linear(in_features, out_dim))
        layers.append(nn.LogSoftmax(dim=-1))  
        
        self.network = nn.Sequential(*layers)
        
    def forward(self, x):
        
        return self.network(x)

In [None]:
class Train:
    
    def __init__(self):
        
        self.x_train, self.x_valid, self.t_train, self.t_valid, self.x_test, self.x_test_id, self.loss_weight = PreProcess()()
    
    def train(self, n_epochs, lr, weight_decay):
               
        in_dim = self.x_train.shape[1]
        out_dim = len(np.unique(self.t_train))
        
        mlp = MLP(in_dim, out_dim).to(device)
        
#         optimizer = optim.Adam(mlp.parameters(), lr=lr, weight_decay=weight_decay)
        optimizer = optim.RMSprop(mlp.parameters(), lr=lr, weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
        
        criterion = nn.NLLLoss(weight=self.loss_weight)  # Negative Log Liklihood Loss
        # criterion = nn.NLLLoss()  # Negative Log Liklihood Loss

        ds_train = TensorDataset(torch.tensor(self.x_train).float(), torch.tensor(self.t_train))
        ds_valid = TensorDataset(torch.tensor(self.x_valid).float(), torch.tensor(self.t_valid))
        self.ds_test = TensorDataset(torch.tensor(self.x_test).float())

        loader_train = DataLoader(ds_train, batch_size=256, shuffle=True)
        loader_valid = DataLoader(ds_valid, batch_size=256, shuffle=True)
        
        start_time = time.time()

        for epoch in range(n_epochs):
            losses_train = []
            losses_valid = []
            preds_train = []
            preds_valid = []
            trues_train = []
            trues_valid = []

            mlp.train()
            for x, t in loader_train:

                true = t.tolist()
                trues_train.extend(true)

                mlp.zero_grad()

                x = x.to(device)
                t = t.to(device)

                y = mlp.forward(x)

                loss = criterion(y, t.long())

                loss.backward()

                optimizer.step()

                pred = y.argmax(1).tolist()
                preds_train.extend(pred)

                losses_train.append(loss.tolist())

            mlp.eval()
            for x, t in loader_valid:

                true = t.tolist()
                trues_valid.extend(true)

                x = x.to(device)
                t = t.to(device)

                y = mlp.forward(x)

                loss = criterion(y, t.long())

                pred = y.argmax(1).tolist()
                preds_valid.extend(pred)

                losses_valid.append(loss.tolist())

            print('EPOCH: {}, Train [Loss: {:.3f}, F1: {:.3f}, R: {:.3f}, P: {:.3f}], Valid [Loss: {:.3f}, F1: {:.3f}, R: {:.3f}, P: {:.3f}]'.format(
                epoch,
                np.mean(losses_train),
                f1_score(trues_train, preds_train, average='weighted'),
                recall_score(trues_train, preds_train, average='weighted'),
                precision_score(trues_train, preds_train, average='weighted'),
                np.mean(losses_valid),
                f1_score(trues_valid, preds_valid, average='weighted'),
                recall_score(trues_valid, preds_valid, average='weighted'),
                precision_score(trues_valid, preds_valid, average='weighted')
            ))

            scheduler.step(np.mean(losses_valid))

        # # State Dict save
        # torch.save(mlp.state_dict(), './{}/{}/cat{}_state_dict'.format(self.valid_year, self.valid_month, pred_layer))

        elapsed_time = time.time() - start_time
        print ("elapsed_time:{:.1f}".format(elapsed_time / 60) + "[min]")
        
        return mlp
    
    def test(self, mlp):

        loader_test = DataLoader(self.ds_test, batch_size=256, shuffle=False)

        preds_test = []

        mlp.eval()
        for x in loader_test:

            x = x[0].to(device)

            y = mlp.forward(x)
            y = y.to('cpu').detach().numpy()
            y = np.exp(y)

            preds_test.extend(y)
        
        return preds_test
    
    def submit(self, preds_test):
        
        df_sample_submission = pd.read_csv('/kaggle/input/otto-group-product-classification-challenge/sampleSubmission.csv')
        df_submission = pd.DataFrame(columns = df_sample_submission.columns)
        df_submission.id = self.x_test_id
        df_submission[df_submission.columns[1:]] = preds_test
        df_submission.to_csv('submission.csv', index=False)
        
        return
    
    def get_optimizer(self, trial, model):
        
        optimizer_names = ['Adam', 'MomentumSGD', 'rmsprop']
        optimizer_name = trial.suggest_categorical('optimizer', optimizer_names)
        weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
        
        if optimizer_name == optimizer_names[0]:
            adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
            optimizer = optim.Adam(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
        elif optimizer_name == optimizer_names[1]:
            momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
            optimizer = optim.SGD(model.parameters(), lr=momentum_sgd_lr, momentum=0.9, weight_decay=weight_decay)
        else:
            rmsprop_lr = trial.suggest_loguniform('rmsprop_lr', 1e-5, 1e-1)
            optimizer = optim.RMSprop(model.parameters(), lr=rmsprop_lr, weight_decay=weight_decay)

        return optimizer
    

    def objective(self, trial):
        
        self.x_train, self.x_valid, self.t_train, self.t_valid, self.x_test, self.x_test_id, self.loss_weight = PreProcess()()
        
        in_dim = self.x_train.shape[1]
        out_dim = len(np.unique(self.t_train))

        n_epochs = 100

        mlp = MLP_tune(trial, in_dim, out_dim).to(device)

        optimizer = self.get_optimizer(trial, mlp)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

        criterion = nn.NLLLoss(weight=self.loss_weight)  # Negative Log Liklihood Loss
        # criterion = nn.NLLLoss()  # Negative Log Liklihood Loss
        
        ds_train = TensorDataset(torch.tensor(self.x_train).float(), torch.tensor(self.t_train))
        ds_valid = TensorDataset(torch.tensor(self.x_valid).float(), torch.tensor(self.t_valid))

        loader_train = DataLoader(ds_train, batch_size=256, shuffle=True)
        loader_valid = DataLoader(ds_valid, batch_size=256, shuffle=True)

        start_time = time.time()

        for epoch in range(n_epochs):
            losses_train = []
            losses_valid = []
            preds_train = []
            preds_valid = []
            trues_train = []
            trues_valid = []

            mlp.train()
            for x, t in loader_train:

                true = t.tolist()
                trues_train.extend(true)

                mlp.zero_grad()

                x = x.to(device)
                t = t.to(device)

                y = mlp.forward(x)

                loss = criterion(y, t.long())

                loss.backward()

                optimizer.step()

                pred = y.argmax(1).tolist()
                preds_train.extend(pred)

                losses_train.append(loss.tolist())

            mlp.eval()
            for x, t in loader_valid:

                true = t.tolist()
                trues_valid.extend(true)

                x = x.to(device)
                t = t.to(device)

                y = mlp.forward(x)

                loss = criterion(y, t.long())

                pred = y.argmax(1).tolist()
                preds_valid.extend(pred)

                losses_valid.append(loss.tolist())

            print('EPOCH: {}, Train [Loss: {:.3f}, F1: {:.3f}, R: {:.3f}, P: {:.3f}], Valid [Loss: {:.3f}, F1: {:.3f}, R: {:.3f}, P: {:.3f}]'.format(
                epoch,
                np.mean(losses_train),
                f1_score(trues_train, preds_train, average='weighted'),
                recall_score(trues_train, preds_train, average='weighted'),
                precision_score(trues_train, preds_train, average='weighted'),
                np.mean(losses_valid),
                f1_score(trues_valid, preds_valid, average='weighted'),
                recall_score(trues_valid, preds_valid, average='weighted'),
                precision_score(trues_valid, preds_valid, average='weighted')
            ))

            scheduler.step(np.mean(losses_valid))

#             metric = f1_score(trues_valid, preds_valid, average='weighted')
            metric = np.mean(losses_valid)
            trial.report(metric, epoch)

            # Handle pruning based on the intermediate value.
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

        elapsed_time = time.time() - start_time
        print ("elapsed_time:{:.1f}".format(elapsed_time / 60) + "[min]")
        
        return metric

In [None]:
def run():
    
    n_cross_valid = 30
    preds_test_all = []
    for n in range(n_cross_valid):   
    
        n_epochs = 100
        lr = 7.560243948481283e-05
        weight_decay = 0.0005896154168582327
        train = Train()
        mlp = train.train(n_epochs, lr, weight_decay)
        preds_test = train.test(mlp)
        preds_test_all.append(preds_test)
    
    preds_test = np.mean(preds_test_all, axis=0)
#     preds_test = preds_test.argmax(1).tolist()
#     preds_test = np.identity(9)[preds_test]
    train.submit(list(preds_test))

run()

## Tune hyper parameter

In [None]:
def tune():
    
    train = Train()
    study = optuna.create_study(direction="minimize")
    study.optimize(train.objective, n_trials=1000)

    pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
# tune()