In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits, load_iris, load_boston, load_breast_cancer
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold, RepeatedKFold

from catboost import CatBoostClassifier
import time

from train_utils import train_model_regression, train_model_classification

In [None]:
%load_ext autoreload
%autoreload 2

# Classification

## Prepare data

In [None]:
data = load_breast_cancer()
X, y = data['data'], data['target']

In [None]:
n_fold = 5
folds = KFold(n_splits=n_fold, shuffle=True, random_state=11)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [None]:
params = {'num_leaves': 128,
          'min_child_samples': 79,
          'objective': 'regression',
          'max_depth': 13,
          'learning_rate': 0.2,
          "boosting_type": "gbdt",
          "subsample_freq": 1,
          "subsample": 0.9,
          "bagging_seed": 11,
          "metric": 'mae',
          "verbosity": -1,
          'reg_alpha': 0.1,
          'reg_lambda': 0.3,
          'colsample_bytree': 1.0
         }

In [None]:
train_model_classification(
    pd.DataFrame(X_train), 
    pd.DataFrame(X_test), 
    pd.DataFrame(y_train),
    params,
    folds,
    model_type='lgb',
    eval_metric='auc',
    columns=None,
    plot_feature_importance=False,
    model=None,
    verbose=10000,
    early_stopping_rounds=100,
    n_estimators=1000,
    )

## Catboost

In [None]:
start = time.time()

model = CatBoostClassifier(
    iterations=500,
    gpu_ram_part=0.99,
    task_type="GPU",
    devices='0:1'
)

model.fit(
    X_train,
    y_train,
    verbose=True,
    plot=True
)

print((time.time() - start))

# Regression

In [None]:
data = load_boston()
X, y = data['data'], data['target']

In [None]:
n_fold = 5
folds = KFold(n_splits=n_fold, shuffle=True, random_state=11)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [None]:
params = {'num_leaves': 128,
          'min_child_samples': 79,
          'objective': 'regression',
          'max_depth': 13,
          'learning_rate': 0.2,
          "boosting_type": "gbdt",
          "subsample_freq": 1,
          "subsample": 0.9,
          "bagging_seed": 11,
          "metric": 'mae',
          "verbosity": -1,
          'reg_alpha': 0.1,
          'reg_lambda': 0.3,
          'colsample_bytree': 1.0
         }

In [None]:
train_model_regression(
    pd.DataFrame(X_train), 
    pd.DataFrame(X_test), 
    pd.DataFrame(y_train),
    params,
    folds,
    model_type='lgb',
    eval_metric='mae',
    columns=None,
    plot_feature_importance=False,
    model=None,
    verbose=10000,
    early_stopping_rounds=100,
    n_estimators=1000,
    )

# MNIST

In [None]:
data = load_digits()
X, y = data['data'], data['target']

## Preprocess

In [None]:
X_folded = X.reshape(-1, 8, 8) / 16.0

In [None]:
y_dummy = pd.get_dummies(y)

## Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_folded, y_dummy, test_size=0.1)

In [None]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

## Keras

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

In [None]:
batch_size = 32
num_classes = 10
epochs = 10

# input image dimensions
img_rows, img_cols = 8, 8
input_shape = img_rows, img_cols, 1

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [None]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [None]:
model.fit(X_train.reshape(-1, 8, 8, 1), y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test.reshape(-1, 8, 8, 1), y_test))

In [None]:
score = model.evaluate(X_test.reshape(-1, 8, 8, 1), y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

## Pytorch

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class ConvNet(nn.Module):
    """
    """
    
    def __init__(self):
        """
        """
        super(ConvNet, self).__init__()
        self.Conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        self.relu = nn.ReLU()
        self.Conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=2)
        self.maxP = nn.MaxPool2d(kernel_size=(2, 2))
        self.drop1 = nn.Dropout(0.25)
        self.Dense1 = nn.Linear(256, 128)
        self.Dense2 = nn.Linear(128, 10)
        self.drop2 = nn.Dropout(0.5)
        self.softmax = nn.Softmax()
        
        
    def forward(self, x):
        """
        """
        x1 = self.relu(self.Conv1(x))
        x2 = self.relu(self.Conv2(x1))
        x3 = self.drop1(self.maxP(x2)).reshape(-1, 256)
        x4 = self.drop2(self.Dense1(x3))
        return self.softmax(self.Dense2(x4))

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    """
    """
    criterion = nn.CrossEntropyLoss()
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        target = torch.argmax(target, dim=1)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target.long())
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(model, device, test_loader):
    """
    """
    criterion = nn.CrossEntropyLoss()
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            target = torch.argmax(target, dim=1)
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target.long()).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
train_set = torch.utils.data.TensorDataset(torch.Tensor(X_train.reshape(-1, 1, 8, 8)), torch.Tensor(y_train.values))
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
test_set = torch.utils.data.TensorDataset(torch.Tensor(X_test.reshape(-1, 1, 8, 8)), torch.Tensor(y_test.values))
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)

In [None]:
device = torch.device("cuda")
model = ConvNet().to(device)
optimizer = optim.Adam(model.parameters())

for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)