In [203]:
import numpy as np
import pandas as pd

import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV, train_test_split,cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score, recall_score

from skorch import NeuralNetClassifier
import optuna

In [174]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [175]:
X, y = make_classification(1000, 20, n_informative=10, random_state=0)
X = X.astype(np.float32)
y = y.astype(np.int64)


In [176]:
X_df = pd.DataFrame(cancer.data, columns=cancer.feature_names)

In [278]:
X_train, X_test ,y_train, y_test = train_test_split(cancer.data, cancer.target)
X_train = torch.Tensor(X_train.astype(np.float32))
y_train = torch.Tensor(y_train.astype(np.int64))
X_test = torch.Tensor(X_test.astype(np.float32))
y_test = torch.Tensor(y_test.astype(np.int64))

In [279]:
y_train = y_train.reshape(-1,1)

In [280]:
type(y_train)

torch.Tensor

In [310]:
ds_train = TensorDataset(X_train, y_train)
ds_test = TensorDataset(X_test, y_test)
train_loader = DataLoader(ds_train, batch_size=64 ,shuffle=True)
test_loader = DataLoader(ds_test, batch_size=64 ,shuffle=False)

In [311]:
class MyModule(nn.Module):
    def __init__(self, num_units1=20, num_units2=20, drop_rate=0.3, nonlin=F.relu):
        super(MyModule, self).__init__()

        self.dense0 = nn.Linear(X_train.shape[1], num_units1)
        self.nonlin = nonlin
        self.dropout = nn.Dropout(drop_rate)
        self.dense1 = nn.Linear(num_units1, num_units2)
        self.output = nn.Linear(num_units2, 1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        X = F.relu(self.dense1(X))
        X = self.output(X)
        return X

In [312]:
def train_net(model, train_loader, test_loader, optimizer, criterionm, n_iter):
    model.train()
    train_losses = []
    train_acc = []
    val_acc = []
    for epoch in range(n_iter):
        running_loss = 0.0
        n = 0
        n_acc = 0
        for i ,(data, target) in enumerate(train_loader):
            # 勾配を初期化
            optimizer.zero_grad()
            # 予測
            pred = model(data)
            # Loss取得
            loss = criterion(pred, target)
            # 誤差
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # 学習データのレコード数を計算
            n += len(data)
            # 正解数を計算
            pred = pred.max(1)[1]
            n_acc += (pred == target).float().sum().item()
        train_losses.append(running_loss / i)
        # 正解率を計算
        train_acc.append(n_acc / n)
        # 検証データでの正解率を計算
        val_acc.append(eval_net(model, test_loader))
        print("#epoch : {} # train_loss : {:.4f} # train_acc : {:.4f} # val_acc : {:.4f}".format(epoch, train_losses[-1], train_acc[-1], val_acc[-1]), flush=True)

In [313]:
def eval_net(model, data_loader):
    model.eval()
    targets = []
    preds = []
    for data, target in data_loader:
        pred = model(data).max(1)[1]
        preds.append(pred)
        targets.append(target)
    
    preds = torch.cat(preds)
    targets = torch.cat(targets)
    

    acc = (preds == targets).float().sum() / len(preds)
    return acc.item()

In [314]:
model = MyModule()
optimizer = optim.Adam(model.parameters(), lr=0.05)
criterion = nn.BCEWithLogitsLoss()

train_net(model, train_loader, test_loader, optimizer, criterion,20)

#epoch : 0 # train_loss : 9.5317 # train_acc : 22.9577 # val_acc : 0.3706
#epoch : 1 # train_loss : 0.7819 # train_acc : 23.1643 # val_acc : 0.3706
#epoch : 2 # train_loss : 0.7741 # train_acc : 23.0610 # val_acc : 0.3706
#epoch : 3 # train_loss : 0.7765 # train_acc : 22.8545 # val_acc : 0.3706
#epoch : 4 # train_loss : 0.7725 # train_acc : 23.0094 # val_acc : 0.3706
#epoch : 5 # train_loss : 0.7737 # train_acc : 22.9061 # val_acc : 0.3706
#epoch : 6 # train_loss : 0.7720 # train_acc : 23.0094 # val_acc : 0.3706
#epoch : 7 # train_loss : 0.7692 # train_acc : 23.2160 # val_acc : 0.3706
#epoch : 8 # train_loss : 0.7749 # train_acc : 22.8545 # val_acc : 0.3706
#epoch : 9 # train_loss : 0.7710 # train_acc : 23.0610 # val_acc : 0.3706
#epoch : 10 # train_loss : 0.7705 # train_acc : 23.1127 # val_acc : 0.3706
#epoch : 11 # train_loss : 0.7671 # train_acc : 23.3709 # val_acc : 0.3706
#epoch : 12 # train_loss : 0.7725 # train_acc : 23.0094 # val_acc : 0.3706
#epoch : 13 # train_loss : 0.7695 #

In [141]:
params = {
    'mdl__max_epochs' : [10,20],
    'mdl__lr' : [0.1, 0.01],
    'mdl__num_units1' : [10,20],
    'mdl__num_units2' : [10,20]
    #'mdl__drop_rate' : [0.3,0.5]
}

net =NeuralNetClassifier(MyModule)

pipe = Pipeline([
    ('sc', StandardScaler()),
    ('mdl', NeuralNetClassifier(MyModule))
])

clf = GridSearchCV(pipe, params, cv=3, scoring='recall')
clf.fit(X_train,y_train)

ValueError: Invalid parameter num_units1 for estimator <class 'skorch.classifier.NeuralNetClassifier'>[uninitialized](
  module=<class '__main__.MyModule'>,
). Check the list of available parameters with `estimator.get_params().keys()`.

In [32]:
clf.best_estimator_[1]

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=2, bias=True)
  ),
)

In [129]:
from sklearn.metrics import confusion_matrix,recall_score,precision_score
y_predict = clf.predict(X_test)
print((y_predict == y_test).sum()/len(y_test))

cm = confusion_matrix(y_test, y_predict)
print(cm)

recall = recall_score(y_test, y_predict)
precision = precision_score(y_test,y_predict)
print("recall : " + str(recall))
print("precision : " + str(precision))

  from ipykernel import kernelapp as app


0.986013986013986
[[42  2]
 [ 0 99]]
recall : 1.0
precision : 0.9801980198019802


In [170]:
def objective(trial):
    max_epochs = trial.suggest_int('max_epochs', 10,20)
    lr = trial.suggest_loguniform('lr',0.0001, 0.1) 
    num_units1 = trial.suggest_int('num_units1', 10,30)
    num_units2 = trial.suggest_int('num_units2', 10,30)
    drop_rate = trial.suggest_float('drop_rate', 0.3,0.7)
    net =NeuralNetClassifier(MyModule)

    pipe = Pipeline([
        ('sc', StandardScaler()),
        ('mdl', NeuralNetClassifier(MyModule(num_units1=num_units1,
                                                                 num_units2=num_units2,
                                                                 drop_rate=drop_rate),
                                                max_epochs=max_epochs,
                                                lr=lr))
    ])
    score = cross_val_score(pipe,X_train,y_train,n_jobs=-1,cv=3,scoring='recall')
    recall = score.mean()
    return recall

In [171]:
recall

0.9797979797979798

In [172]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)
best_params = study.best_params

[I 2020-10-03 17:17:18,496] A new study created in memory with name: no-name-b1797a2a-9c95-416b-9485-7670df9a42af
[I 2020-10-03 17:17:19,789] Trial 0 finished with value: 1.0 and parameters: {'max_epochs': 71, 'lr': 0.0013625280727706625, 'num_units1': 1, 'num_units2': 1, 'drop_rate': 0.679689379299498}. Best is trial 0 with value: 1.0.
[W 2020-10-03 17:17:19,808] Trial 1 failed because of the following error: RuntimeError('Trying to create tensor with negative dimension -1: [-1, 30]',)
Traceback (most recent call last):
  File "/Users/GenseiYoshimura/anaconda/lib/python3.6/site-packages/optuna/study.py", line 778, in _run_trial
    result = func(trial)
  File "<ipython-input-170-6637d38d1e65>", line 13, in objective
    drop_rate=drop_rate),
  File "<ipython-input-137-3fabe930023c>", line 5, in __init__
    self.dense0 = nn.Linear(X_train.shape[1], num_units1)
  File "/Users/GenseiYoshimura/anaconda/lib/python3.6/site-packages/torch/nn/modules/linear.py", line 72, in __init__
    self

RuntimeError: Trying to create tensor with negative dimension -1: [-1, 30]

In [160]:
best_params

{'max_epochs': 45,
 'lr': 0.033014927100503445,
 'num_units1': 15,
 'num_units2': 20,
 'drop_rate': 0.6839935044046618}

In [161]:
pipe = Pipeline([
        ('sc', StandardScaler()),
        ('mdl', NeuralNetClassifier(MyModule(num_units1=best_params['num_units1'],
                                                                 num_units2=best_params['num_units2'],
                                                                 drop_rate=best_params['drop_rate']),
                                                max_epochs=best_params['max_epochs'],
                                                lr=best_params['lr']
                                                ))
])
pipe.fit(X_train,y_train)

  from ipykernel import kernelapp as app


  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        0.7621       0.3953        0.7388  0.0161
      2        0.7237       0.3953        0.7170  0.0197
      3        0.7141       0.4419        0.6990  0.0203
      4        0.6893       0.5930        0.6816  0.0316
      5        0.6829       0.7093        0.6663  0.0348
      6        0.6650       0.7907        0.6501  0.0198
      7        0.6607       0.8488        0.6353  0.0172
      8        0.6445       0.8256        0.6206  0.0343
      9        0.6206       0.8140        0.6054  0.0314
     10        0.6200       0.8140        0.5913  0.0294
     11        0.6013       0.7791        0.5756  0.0361
     12        0.5848       0.7791        0.5598  0.0234
     13        0.5666       0.7791        0.5441  0.0218
     14        0.5771       0.7791        0.5292  0.0193
     15        0.5504       0.7907        0.5128  0.0167
     16        0.5361       0.8

Pipeline(steps=[('sc', StandardScaler()),
                ('mdl',
                 <class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense0): Linear(in_features=30, out_features=15, bias=True)
    (dropout): Dropout(p=0.6839935044046618, inplace=False)
    (dense1): Linear(in_features=15, out_features=20, bias=True)
    (output): Linear(in_features=20, out_features=2, bias=True)
  ),
))])

In [162]:
y_predict = pipe.predict(X_test)

  from ipykernel import kernelapp as app


In [163]:
(y_predict == y_test).sum()/len(y_test)

0.958041958041958

In [164]:
from sklearn.metrics import confusion_matrix,recall_score,precision_score
cm = confusion_matrix(y_test, y_predict)
cm

array([[40,  4],
       [ 2, 97]])

In [165]:
recall = recall_score(y_test, y_predict)
precision = precision_score(y_test,y_predict)
print("recall : " + str(recall))
print("precision : " + str(precision))

recall : 0.9797979797979798
precision : 0.9603960396039604
