# 請結合前面的知識與程式碼，比較不同的 optimizer 與 learning rate 組合對訓練的結果與影響
### 常見的 optimizer 包含
* SGD
* RMSprop
* AdaGrad
* Adam

In [1]:
# check gpu status
!nvidia-smi

Tue Mar 26 16:12:25 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 419.35       Driver Version: 419.35       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 750    WDDM  | 00000000:01:00.0  On |                  N/A |
| 40%   27C    P8     1W /  38W |    919MiB /  1024MiB |      5%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

In [2]:
import os
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, RMSprop, Adagrad, Adam

# Disable GPU
# os.environ["CUDA_VISIBLE_DEVICES"] = ""

Using TensorFlow backend.


In [3]:
train, test = keras.datasets.cifar10.load_data()

In [4]:
## 資料前處理
def preproc_x(x, flatten=True):
    x = x / 255.
    if flatten:
        x = x.reshape((len(x), -1))
    return x

def preproc_y(y, num_classes=10):
    if y.shape[-1] == 1:
        y = keras.utils.to_categorical(y, num_classes)
    return y    

In [5]:
x_train, y_train = train
x_test, y_test = test

# Preproc the inputs
x_train = preproc_x(x_train)
x_test = preproc_x(x_test)

# Preprc the outputs
y_train = preproc_y(y_train)
y_test = preproc_y(y_test)

In [6]:
def build_mlp(input_shape, output_units=10, num_neurons=[128,64,32]):
    """
    Build your own model
    """
    model=Sequential()
    model.add(Dense(units=input_shape[1], input_dim=input_shape[1],kernel_initializer='normal',activation='relu',name='input'))
    for i, n_units in enumerate(num_neurons):
        model.add(Dense(units=n_units,kernel_initializer='normal',activation='relu',name='hidden'+str(i+1)))
    model.add(Dense(units=output_units,kernel_initializer='normal',activation='softmax',name='output'))
    return model

In [7]:
## 超參數設定
"""
Set your required experiment parameters
"""
LEARNING_RATE = [1e-1,1e-2,1e-3]
EPOCHS = 50
BATCH_SIZE = 256
MOMENTUM = 0.9

def build_opt(opt,lr):
    if opt== 'SGD':
        return SGD(lr=lr, nesterov=True, momentum=True)
    elif opt== 'RMSprop':
        return RMSprop(lr=lr,rho=0.9,epsilon=None,decay=0.0)
    elif opt=='Adagrad':
        return Adagrad(lr=lr,epsilon=None,decay=0.0)
    elif opt== 'Adam':
        return Adam(lr=lr,beta_1=0.9,beta_2=0.999,epsilon=None,decay=0.0,amsgrad=False)


In [10]:
results = {}
OPTS = ['SGD','RMSprop','Adagrad','Adam']
"""
建立你的訓練與實驗迴圈並蒐集資料
"""
for lr in LEARNING_RATE:
    for opt in OPTS:
        keras.backend.clear_session() #把舊的graph清掉
        print(f'Current opt = {opt}, lr={lr}\n')
        
        model = build_mlp(input_shape=x_train.shape)
        model.summary()
        
        optimizer=build_opt(opt,lr)
              
        model.compile(optimizer=optimizer, loss='categorical_crossentropy',metrics=['accuracy'])
        
        model.fit(x_train,y_train,
                  epochs=EPOCHS,
                  batch_size=BATCH_SIZE,
                  validation_data=(x_test,y_test),
                  shuffle=True)
              
        # collect results
        train_loss=model.history.history['loss']
        valid_loss=model.history.history['val_loss']
        train_acc =model.history.history['acc']
        valid_acc =model.history.history['val_acc']
        
        # create result dictionary
        exp_name_tag = 'exp_%s'%str(opt)+str('+lr_')+str(lr)
        
        results[exp_name_tag]={'train_loss':train_loss,
                               'valid_loss':valid_loss,
                               'train_acc':train_acc,
                               'valid_acc':valid_acc}

Current opt = SGD, lr=0.1

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (Dense)                (None, 3072)              9440256   
_________________________________________________________________
hidden1 (Dense)              (None, 128)               393344    
_________________________________________________________________
hidden2 (Dense)              (None, 64)                8256      
_________________________________________________________________
hidden3 (Dense)              (None, 32)                2080      
_________________________________________________________________
output (Dense)               (None, 10)                330       
Total params: 9,844,266
Trainable params: 9,844,266
Non-trainable params: 0
_________________________________________________________________
Train on 50000 samples, validate on 10000 samples
Epoch 1/50


InternalError: GPU sync failed

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

"""
將實驗結果繪出
"""