# Deep Learning with Bayesian Opt

#### 사전설치 해야할 패키지  
pip install bayesian-optimization  
pip install python-mnist==0.5

In [3]:
# warning message 제거
import warnings
warnings.filterwarnings(action='ignore')

In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
import random

In [5]:
from mnist import MNIST
import torch.utils.data as data_utils
from torch.utils.data import DataLoader

# loss 값 ploting 을 위해 사용
import matplotlib.pyplot as plt

Check GPU setting

In [16]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

cpu


### Load Dataset and preprocessing

In [17]:
mnist = MNIST('./data/MNIST/raw')
x_train, y_train = mnist.load_training()
x_test, y_test = mnist.load_testing()

# data 는 list 형식을 되어 있으므로 shape 을 보고, 이미지 visualization 을 하기 편한 array 형태로 바꾸어준다.

x_train=np.asarray(x_train)
y_train=np.asarray(y_train)
x_test=np.asarray(x_test)
y_test=np.asarray(y_test)

print("x_train 의 shape={}, y_train 의 shape={}".format(x_train.shape,y_train.shape))
print("x_test 의 shape={}, y_test 의 shape={}".format(x_test.shape,y_test.shape))

x_train 의 shape=(60000, 784), y_train 의 shape=(60000,)
x_test 의 shape=(10000, 784), y_test 의 shape=(10000,)


### Convert dataset into Tensor

In [18]:
batch_size = 1000
traindata = data_utils.TensorDataset(torch.FloatTensor(x_train), torch.FloatTensor(y_train))
trainloader = data_utils.DataLoader(traindata, batch_size = 1000, shuffle = True, drop_last = False)

test_data = data_utils.TensorDataset(torch.FloatTensor(x_test), torch.FloatTensor(y_test))
testloader = data_utils.DataLoader(test_data, batch_size=5000, shuffle = False)

### Make Batch_Norm_Class

In [19]:
class Batch_Norm(nn.Module):
    def __init__(self):
        super(Batch_Norm, self).__init__()

        self.network = nn.Sequential(nn.Linear(784,256),
                                     nn.BatchNorm1d(256),
                                     nn.ReLU(),
                                     nn.Linear(256,64),
                                     nn.BatchNorm1d(64),
                                     nn.ReLU(),
                                     nn.Linear(64,10))
                                     
    def forward(self, x):
        output = self.network(x)
        
        return output
    
model = Batch_Norm()

In [20]:
model

Batch_Norm(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=256, out_features=64, bias=True)
    (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=10, bias=True)
  )
)

### Set parameters

정해야 할 것. 

1. Object Function
2. Object Function에 들어갈 hyper parameter
3. Hyper Parameter의 범위
4. 초기 GP Regression을 할 때, 몇개의 점을 기준으로 할지. 

In [21]:
epoch = 15
learning_rate = 0.01
weight_decay = 1e-5

loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)

## Object funtion define
-Object function 정의내리고 최적화할 하이퍼파라미터를 인자값으로 넣어줍니다  
-최적화 하이퍼파라미터 : Learning rate(lr) , L2 Regurization(wd) 

Object Function에는 그냥 Training하는 부분이 들어가면 된다. 

In [22]:
def train_acc(lr, wd):

    loss_list = []

    predict_list = []
    real_list = []
    epoch = 15
    
    learning_rate = lr
    weight_decay = wd
    
    
    correct = 0
    
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)
    
    for num_epoch in range(epoch):
        model.train()
        average_loss = 0

        for batch_idx, (image, label) in enumerate(trainloader):
            data_len = len(trainloader)

            x_Data = image

            optimizer.zero_grad()

            y_predict = model.forward(x_Data)

            predict_sftmax = torch.nn.functional.softmax(y_predict)
            predict_label = torch.argmax(predict_sftmax, dim=1)

            y_label_long = torch.tensor(label, dtype=torch.long)
            y_label_long = y_label_long

            loss = loss_function(y_predict, y_label_long)

            loss.backward()

            optimizer.step()

            loss_list.append(loss)

            predict_list.append(predict_label.cpu().numpy())
            real_list.append(y_label_long.cpu().numpy())

            average_loss += (loss.item()/data_len)



    correct = 0

    for i in range(len(real_list)):
        for j in range(len(real_list[1])):
            answer = (predict_list[i][j] == real_list[i][j])
            if answer == True:
                correct += 1
            else:
                continue

    total_len = len(real_list) * len(real_list[1])
    acc=(correct/total_len) * 100

    return acc

# Bayesian optimization

-Bayesian optimizer를 이용해서 최적의 learning rate, L2 Hyperparameter 찾기

In [23]:
from bayes_opt import BayesianOptimization

In [None]:

pbounds = { 'lr' : (0, 1), 'wd': (0, 1)}  # 하이퍼파라미터 범위 설정
optimizer = BayesianOptimization( f = train_acc, pbounds = pbounds )

optimizer.maximize(init_points = 5, n_iter=15)  #n_iter 초기 point를 제외하고 시행할 횟수



|   iter    |  target   |    lr     |    wd     |
-------------------------------------------------
| [0m 1       [0m | [0m 10.53   [0m | [0m 0.2789  [0m | [0m 0.4744  [0m |
| [95m 2       [0m | [95m 12.77   [0m | [95m 0.7969  [0m | [95m 0.1334  [0m |
| [0m 3       [0m | [0m 10.24   [0m | [0m 0.2811  [0m | [0m 0.9414  [0m |
| [0m 4       [0m | [0m 10.23   [0m | [0m 0.638   [0m | [0m 0.2682  [0m |
| [95m 5       [0m | [95m 51.41   [0m | [95m 0.3242  [0m | [95m 0.04699 [0m |
| [95m 6       [0m | [95m 97.63   [0m | [95m 0.2277  [0m | [95m 0.0     [0m |


In [20]:
print( optimizer.max )

{'target': 97.63855555555556, 'params': {'lr': 0.27877435003825407, 'wd': 0.0}}


In [21]:
for i, res in enumerate( optimizer.res ):
    print(f'Iteration {i}: \n\t{res}')

Iteration 0: 
	{'target': 10.326444444444444, 'params': {'lr': 0.417022004702574, 'wd': 0.7203244934421581}}
Iteration 1: 
	{'target': 11.010555555555555, 'params': {'lr': 0.00011437481734488664, 'wd': 0.30233257263183977}}
Iteration 2: 
	{'target': 44.888111111111115, 'params': {'lr': 0.14675589081711304, 'wd': 0.0923385947687978}}
Iteration 3: 
	{'target': 10.480666666666668, 'params': {'lr': 0.1862602113776709, 'wd': 0.34556072704304774}}
Iteration 4: 
	{'target': 10.27511111111111, 'params': {'lr': 0.39676747423066994, 'wd': 0.538816734003357}}
Iteration 5: 
	{'target': 82.99166666666666, 'params': {'lr': 0.16109732777980845, 'wd': 0.0262953572626644}}
Iteration 6: 
	{'target': 10.449222222222222, 'params': {'lr': 0.8300019078873172, 'wd': 0.6601379171587518}}
Iteration 7: 
	{'target': 10.203555555555557, 'params': {'lr': 0.14930671317372957, 'wd': 0.6288117640027626}}
Iteration 8: 
	{'target': 83.465, 'params': {'lr': 0.2045609248379471, 'wd': 0.01446991701452161}}
Iteration 9: 
	

Bayesian Optimization 패키지 https://github.com/fmfn/BayesianOptimization