# 고급 딥러닝 기법

## 함수형 API

고급 심층 신경망 구조를 구축하기 위해 필수  
다중 입력, 다중 출력, 복잡한 네트워크 토폴로지 등의 모델을 만들기 위해 사용 

Sequential 모델은 네트워크의 입력과 출력이 하나인 경우만 만들 수 있다.  
다중 입력, 다중 출력 문제에 대해 Sequential 모델로는 구현이 불가능하다.  

하지만 함수형 API를 사용하면 이것이 가능해진다.  

In [1]:
from keras import Input, layers

input_tensor = Input(shape=(32,))
dense = layers.Dense(32, activation='relu')
output_tensor = dense(input_tensor)

In [2]:
from keras.models import Sequential, Model
from keras import layers
from keras import Input

seq_model = Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

seq_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


In [4]:
input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(x)

model = Model(input_tensor, output_tensor)

model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 64)]              0         
_________________________________________________________________
dense_4 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_5 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_6 (Dense)              (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


### 다중 입력 모델

대표 예시 : 질의-응답 모델 (question-answering model)

In [1]:
from keras.models import Model
from keras import layers
from keras import Input

text_vocab_size = 10000
question_vocab_size = 10000
answer_vocab_size = 500

# text input network
text_input = Input(shape=(None,), dtype='int32', name='text')
embedded_text = layers.Embedding(text_vocab_size, 64)(text_input)
encoded_text = layers.LSTM(32)(embedded_text)

# question input network
question_input = Input(shape=(None,), dtype='int32', name='question')
embedded_question = layers.Embedding(question_vocab_size, 32)(question_input)
encoded_question = layers.LSTM(16)(embedded_question)

# 두 입력을 합침
concatenated = layers.concatenate([encoded_text, encoded_question], axis=-1)

# 하나의 출력으로
answer = layers.Dense(answer_vocab_size, activation='softmax')(concatenated)

# 2개의 입력, 1개의 출력
model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop', 
              loss='categorical_crossentropy', 
              metrics=['acc'])

In [2]:
import numpy as np
from keras.utils import to_categorical

num_samples = 1000
max_len = 100

# input data 생성 (정수형 시퀀스 데이터)
text = np.random.randint(1, text_vocab_size, size=(num_samples, max_len))
question = np.random.randint(1, question_vocab_size, size=(num_samples, max_len))

# 정수값
answers = np.random.randint(0, answer_vocab_size, size=num_samples)

In [3]:
text[0], question[0], answers[0]

(array([5233, 5581,  394, 6471, 9180, 8736, 2653, 4431, 5749, 5988,  121,
        8973, 5796, 1843, 6517, 7693, 3893, 1459, 5545, 5557, 2337, 8051,
         879, 9678, 2774, 6729, 8961,  617, 8147, 4058, 3198, 2608, 9641,
        3332, 7900, 2314, 1996, 1326, 7062, 7353, 2718, 2783, 2229, 4861,
        6816, 4351,  524, 1509, 5197,  764, 5041, 3981, 8532, 3564, 4596,
        6710, 9383, 7204, 6372, 4380, 8427, 9808,  687, 2244, 5430, 5606,
         122,  730, 3178, 9053, 8904, 6544, 9199, 8051, 6024,  481, 7991,
        4957, 2877, 3879, 1976, 6726, 6529, 9377, 3941, 4114, 2703,  602,
        7414, 7049,  683, 4754, 5424, 9380, 5281, 3248, 7520, 4038, 9534,
        1827]),
 array([3548, 1975, 5752, 3030, 9039,  160, 5476,   93, 3284, 7148, 7355,
        9918, 4337, 2276, 1947, 5746, 7207, 3438, 3300, 3547, 8986, 6237,
        1565, 9637, 8183, 8148, 8596, 1861, 6513, 6464,  178, 6995, 8181,
        4803, 5585, 6453, 1481, 5984, 1324, 2256, 3660, 8325, 3493, 3024,
        5899, 5171, 76

In [4]:
answers = to_categorical(answers) # one-hot encoding

In [5]:
model.fit({'text': text, 'question': question}, answers,
         epochs=10,
         batch_size=128,
         verbose=2)

Epoch 1/10
8/8 - 3s - loss: 6.2147 - acc: 0.0010
Epoch 2/10
8/8 - 0s - loss: 6.1989 - acc: 0.0560
Epoch 3/10
8/8 - 0s - loss: 6.1638 - acc: 0.0340
Epoch 4/10
8/8 - 0s - loss: 6.0841 - acc: 0.0090
Epoch 5/10
8/8 - 0s - loss: 5.9931 - acc: 0.0110
Epoch 6/10
8/8 - 0s - loss: 5.9036 - acc: 0.0120
Epoch 7/10
8/8 - 0s - loss: 5.8063 - acc: 0.0180
Epoch 8/10
8/8 - 0s - loss: 5.7261 - acc: 0.0210
Epoch 9/10
8/8 - 0s - loss: 5.6282 - acc: 0.0330
Epoch 10/10
8/8 - 0s - loss: 5.5519 - acc: 0.0340


<tensorflow.python.keras.callbacks.History at 0x7fa731c372d0>

### 다중 출력 모델

소셜 미디어 포스트 -> 나이, 소득, 성별 각각을 예측  

나이 = 회귀  
소득 = 다중 클래스 분류  
성별 = 이진 클래스 분류  

In [51]:
from keras import layers, Input
from keras.models import Model

vocab_size = 50000
num_income_groups = 10

In [52]:
# input network
posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedded_posts = layers.Embedding(vocab_size, 256)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

In [53]:
# output layers
age_prediction = layers.Dense(1, 
                              name='age')(x)  # regression
income_prediction = layers.Dense(num_income_groups, 
                                 activation='softmax', 
                                 name='income')(x)  # multi-class
gender_prediction = layers.Dense(1, 
                                 activation='sigmoid', 
                                 name='gender')(x)  # binary-class

In [54]:
model = Model(posts_input, 
              [age_prediction, income_prediction, gender_prediction])

In [55]:
model.compile(optimizer='rmsprop', 
              loss={'age': 'mse', 
                    'income': 'categorical_crossentropy', 
                    'gender': 'binary_crossentropy'}, 
              loss_weights={'age': 0.25, 
                            'income': 1., 
                            'gender': 10.})

In [56]:
import numpy as np
from keras.utils import to_categorical

num_samples = 1000
max_len = 500

# input data 생성 (정수형 시퀀스 데이터)
posts = np.random.randint(1, vocab_size, size=(num_samples, max_len))

In [57]:
posts.shape

(1000, 500)

In [58]:
# ouput data 생성
age_targets = np.random.randint(20, 60, size=num_samples).astype('float32')
income_targets = np.random.randint(0, num_income_groups, size=num_samples)
gender_targets = np.random.randint(0, 2, size=num_samples)

In [59]:
income_targets = to_categorical(income_targets)

In [60]:
age_targets.shape

(1000,)

In [61]:
age_targets[0]

30.0

In [62]:
income_targets[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [63]:
gender_targets[0]

1

In [64]:
income_targets.shape, gender_targets.shape

((1000, 10), (1000,))

In [66]:
model.fit(posts, {'age': age_targets, 
                  'income': income_targets, 
                  'gender': gender_targets}, 
          epochs=5, batch_size=64, validation_split=0.2, verbose=2)

Epoch 1/5
13/13 - 4s - loss: 47.4936 - age_loss: 139.2836 - income_loss: 2.4068 - gender_loss: 1.0266 - val_loss: 46.1725 - val_age_loss: 140.9499 - val_income_loss: 2.3286 - val_gender_loss: 0.8607
Epoch 2/5
13/13 - 4s - loss: 46.6892 - age_loss: 148.6702 - income_loss: 2.3616 - gender_loss: 0.7160 - val_loss: 99.6918 - val_age_loss: 355.6277 - val_income_loss: 2.4006 - val_gender_loss: 0.8384
Epoch 3/5
13/13 - 5s - loss: 42.0934 - age_loss: 126.0384 - income_loss: 2.3918 - gender_loss: 0.8192 - val_loss: 67.5475 - val_age_loss: 226.3976 - val_income_loss: 2.3771 - val_gender_loss: 0.8571
Epoch 4/5
13/13 - 5s - loss: 38.9664 - age_loss: 109.5569 - income_loss: 2.3981 - gender_loss: 0.9179 - val_loss: 126.1112 - val_age_loss: 451.6675 - val_income_loss: 2.3988 - val_gender_loss: 1.0796
Epoch 5/5
13/13 - 5s - loss: 45.7907 - age_loss: 141.5203 - income_loss: 2.3782 - gender_loss: 0.8032 - val_loss: 19.2513 - val_age_loss: 38.4525 - val_income_loss: 2.3399 - val_gender_loss: 0.7298


<tensorflow.python.keras.callbacks.History at 0x7fa7132a0890>

In [68]:
posts[0].shape

(500,)

In [72]:
posts[0].reshape((1, 500)).shape

(1, 500)

In [74]:
prediction = model.predict(posts[0].reshape((1, 500)))

In [75]:
prediction

[array([[36.05602]], dtype=float32),
 array([[0.07910356, 0.11878481, 0.07048823, 0.13836277, 0.10453656,
         0.11850392, 0.11151161, 0.06323165, 0.08942355, 0.10605338]],
       dtype=float32),
 array([[0.37889308]], dtype=float32)]

In [76]:
prediction[0]

array([[36.05602]], dtype=float32)

In [77]:
prediction[1]

array([[0.07910356, 0.11878481, 0.07048823, 0.13836277, 0.10453656,
        0.11850392, 0.11151161, 0.06323165, 0.08942355, 0.10605338]],
      dtype=float32)

In [78]:
np.argmax(prediction[1])

3

In [79]:
prediction[2]

array([[0.37889308]], dtype=float32)

In [80]:
np.argmax(prediction[2])

0

In [82]:
age_targets[0], income_targets[0], gender_targets[0]

(30.0, array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32), 1)

### Inception

keras.applications.inception_v3.InceptionV3 : 인셉션 V3 모델  

케라스의 이미지 분류 모델, ImageNet에서 pretrained 된 weight를 로드하는 것을 파라미터로 설정 가능  

공식문서 참조: https://keras.io/api/applications/inceptionv3/

### Residual Connection

gradient 소실 문제, 표현 병목 문제를 해결함  
10개 이상의 층을 가진 모델에 추가하면 도움이 된다.  

하위 층의 출력을 상위 층의 입력으로 사용  
하위 층의 출력이 상위 층의 출력에 더해지는 것 (상위 층의 활성화 함수에 입력으로 들어가는 것 아니고)  
따라서 출력의 크기가 동일해야 한다.  

In [None]:
# x : 4D input
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)

# layers.add() 를 사용하여 두 값을 더함
y = layers.add([y, x])

In [None]:
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
y = layers.MaxPooling2D(2, strides=2)(x)

# x의 크기를 y와 똑같이 만들기
# 1 * 1 합성곱을 이용하여 다운 샘플링
residual = layers.Conv2D(128, 1, strides=2, padding='same')(x)

y = layers.add([y, residual])

### 층 가중치 공유

In [10]:
import numpy as np
from keras.utils import to_categorical

num_samples = 1000

# input data 생성
left_data = np.random.random_sample(size=(num_samples, 128, 128))
right_data = np.random.random_sample(size=(num_samples, 128, 128))

# output data
targets = np.random.randint(0, 2, size=num_samples)

In [11]:
left_data[0].shape, left_data.shape

((128, 128), (1000, 128, 128))

In [12]:
targets.shape

(1000,)

In [14]:
from keras import layers, Input
from keras.models import Model

lstm = layers.LSTM(32)

left_input = Input(shape=(None, 128))
left_output = lstm(left_input)

right_input = Input(shape=(None, 128))
right_output = lstm(right_input)

merged = layers.concatenate([left_output, right_output], axis=-1)
predictions = layers.Dense(1, activation='sigmoid')(merged)

model = Model([left_input, right_input], predictions)

model.compile(optimizer='rmsprop', 
              loss='binary_crossentropy', 
              metrics=['acc'])
model.fit([left_data, right_data], targets, 
          epochs=5, batch_size=64, verbose=2)

Epoch 1/5
16/16 - 3s - loss: 0.7064 - acc: 0.5150
Epoch 2/5
16/16 - 1s - loss: 0.6858 - acc: 0.5480
Epoch 3/5
16/16 - 1s - loss: 0.6821 - acc: 0.5680
Epoch 4/5
16/16 - 1s - loss: 0.6744 - acc: 0.5770
Epoch 5/5
16/16 - 1s - loss: 0.6690 - acc: 0.6080


<tensorflow.python.keras.callbacks.History at 0x7fe6e4916450>

### 층과 모델

모델을 층처럼 사용 가능  

In [None]:
from keras import layers, applications, Input

xception_base = applications.Xception(weights=None, 
                                      include_top=False)

# 250 * 250 rgb image
left_input = Input(shape=(250, 250, 3))
right_input = Input(shape=(250, 250, 3))

left_features = xception_base(left_input)
right_features = xception_base(right_input)

merged_features = layers.concatenate([left_features, right_features], axis=-1)