# 8차시: 텐서플로우 2.x 활용 Speech Recognition

## AI 맛보기 8주차: 2020. 08. 25. 20:00 ~ 22:00 (120분)
1. 도구 불러오기 및 버전 확인
1. 학습 데이터 다운로드: 
1. 학습 데이터 살펴보기: 차원, 미리보기
1. 학습 데이터 전처리 (정규화)
1. 학습 모델 준비: Deep CNN
1. 학습
1. 학습 결과 테스트
1. 확률 모델
1. 예측
1. Convolution Neural Network

#### 참고자료
- [파이썬 3 표준 문서](https://docs.python.org/3/index.html)
- [텐서플로우 CNN](https://www.tensorflow.org/tutorials/images/cnn)

### 1. 도구 불러오기 및 버전 확인

In [None]:
# 도구 준비
import os
# import shutil
# import random
# import math

import tensorflow as tf # 텐서플로우
import tensorflow_io as tfio
import matplotlib.pyplot as plt # 시각화 도구
%matplotlib inline
# import matplotlib.font_manager as fm
import numpy as np

In [None]:
print(f'Tensorflow 버전을 확인합니다: {tf.__version__}')

### 2. 학습 데이터 다운로드

In [None]:
!rm -r data_speech_commands
!mkdir data_speech_commands
!tar --directory data_speech_commands -xvf data_speech_commands_v0.02.tar.gz &> /dev/null

In [None]:
!rm -r data_speech_commands/_background_noise_

In [None]:
path_root = './data_speech_commands'

files = list()
labels = list()
lab = list()
for l1 in os.scandir(path_root):
    if l1.is_file():
        continue
    label = os.path.basename(l1.path)
    if label not in lab:
        lab.append(label)
    label = lab.index(label)
    for l2 in os.scandir(l1.path):
        files.append(l2.path)
        labels.append(label)
dataset_root = tf.data.Dataset.from_tensor_slices((files, labels))

In [None]:
display(p.numpy().decode('utf-8'))
display(l.numpy().decode('utf-8'))

In [None]:
audio = tfio.audio.AudioIOTensor(p)
print(audio)

In [None]:
audio_tensor = tf.squeeze(audio.to_tensor(), axis=[-1])
print(audio_tensor)

In [None]:
np.pad(audio_tensor, (0, 16000-len(audio_tensor[:16000])), 'constant', constant_values=0)

In [None]:
np.pad(audio_tensor[:15998], (0, 16000-len(audio_tensor[:15998])), 'constant', constant_values=0)

In [None]:
audio_tensor[15995:15998]

In [None]:
from IPython.display import Audio

In [None]:
Audio(audio_tensor.numpy(), rate=audio.rate.numpy())

In [None]:
tensor = tf.cast(audio_tensor, tf.float32) / 2**16
plt.figure()
plt.plot(tensor.numpy())

In [None]:
# Convert to spectrogram
spectrogram = tfio.experimental.audio.spectrogram(
    tensor, nfft=512, window=512, stride=256)

plt.figure()
plt.imshow(tf.math.log(spectrogram).numpy())
# plt.colorbar()

In [None]:
tf.expand_dims(tf.math.log(spectrogram), -1).shape

In [None]:
plt.figure()
plt.imshow(spectrogram.numpy(), cmap='gray')
# plt.colorbar()

In [None]:
from PIL import Image

In [None]:
audio_tensor[:15000].shape[0]

In [None]:
i = Image.fromarray(spectrogram.numpy()*256)

In [None]:
i = i.convert('RGB')

In [None]:
np.array(i).shape

In [None]:
audio_tensor[:16000].shape[0]

In [None]:
def load_audio(path, label):
    audio = tfio.audio.AudioIOTensor(path, dtype='int16')
    audio_tensor = tf.squeeze(audio.to_tensor(), axis=[-1])
    print(audio_tensor[:16000].shape)
    paddings = (16000-audio_tensor[:16000].shape[0])
    
    audio_tensor = tf.pad(audio_tensor[:16000], paddings,
                          'constant', constant_values=0)
    tensor = tf.cast(audio_tensor, tf.float32) / 2**16
    spectrogram = tfio.experimental.audio.spectrogram(
                          tensor, nfft=512, window=512, stride=256)
    spectrogram_log = tf.expand_dims(tf.math.log(spectrogram), -1)
    return spectrogram_log, label

In [None]:
dataset = dataset_root.map(load_audio).batch(32)

In [None]:
print('모델 생성')
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(tf.math.log(spectrogram).shape) + (1,)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10)
])
model.summary()

In [None]:
print('모델 컴파일')
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

### 6. 학습

In [None]:
epochs = 10
history = model.fit(dataset,
                    epochs=epochs)

### 7. 학습 결과 테스트

In [None]:
history_dict = history.history
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

fig1 = plt.figure(figsize=(6, 10))
ax = fig1.add_subplot(2, 1, 1)
ax.plot(epochs, loss, 'bo', label='Training loss')
ax.plot(epochs, val_loss, 'b', label='Validation loss')
ax.set_ylim((0, math.ceil(max(max(loss), max(val_loss)))))
ax.set_title('Training and validation loss', fontsize=12)
ax.set_xlabel('Epochs', fontsize=10)
ax.set_ylabel('Loss', fontsize=10)
ax.legend()

ax = fig1.add_subplot(2, 1, 2)
ax.plot(epochs, acc, 'bo', label='Training acc')
ax.plot(epochs, val_acc, 'b', label='Validation acc')
ax.set_ylim((0, math.ceil(max(max(acc), max(val_acc)))))
ax.set_title('Training and validation accuracy', fontsize=12)
ax.set_xlabel('Epochs', fontsize=10)
ax.set_ylabel('Accuracy', fontsize=10)
ax.legend()

In [None]:
print(f'{len(test_images)}개 이미지로 테스트합니다.')
test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)
print()
print(f'테스트 이미지 정확도: {test_acc}')

### 8. 확률 모델

In [None]:
print('확률 모델')
probability_model = tf.keras.Sequential([model, 
                                         tf.keras.layers.Softmax()])
probability_model.summary()

In [None]:
print('각 테스트이미지별 레이블 확률 계산')
predictions = probability_model.predict(test_images)

In [None]:
idx = 0
print(f'예측 레이블은 확률 중 최대 확률을 선택합니다.')
print(f'Test image {idx} prediction: \n{predictions[idx]}')
print(f'Maximum probability label: {np.argmax(predictions[idx])}')
print(f'Actual label: {test_labels[idx][0]}')

In [None]:
fig5 = plt.figure(figsize=(6, 6))
ax = fig5.add_subplot()
axm = ax.imshow(test_images[idx])
fig5.suptitle(f'Test Image [{idx}]', fontsize=20)
ax.set_title(f'Label: {class_names[test_labels[idx][0]]}', fontsize=16)
ax.grid(False)

### 9. 예측

In [None]:
# 이미지 함수 정의
def draw_image(ax, prob, image, true_label):
    ax.grid(False)
    axm = ax.imshow(image)
    ax.set_xticks([])
    ax.set_yticks([])
    predicted_label = np.argmax(prob)
    if true_label == predicted_label:
        color = 'blue'
    else:
        color = 'red'
    predicted_name = class_names[predicted_label]
    true_name = class_names[true_label]
    label = f'{predicted_name} {np.max(prob)*100:3.0f}% ({true_name})'
    ax.set_title(f'{label}', color=color)
    
def draw_bar(ax, prob, true_label):
    ax.grid(False)
    ax.set_xticks(range(10))
    ax.set_yticks(np.arange(0, 1.2, 0.2))
    bar = ax.bar(range(10), prob, color='gray')
    ax.set_ylim((0, 1))
    predicted_label = np.argmax(prob)
    bar[predicted_label].set_color('red')
    bar[true_label].set_color('blue')

In [None]:
for i in range(len(test_images)):
    if test_labels[i][0] == np.argmax(predictions[i]):
        break
fig6 = plt.figure(figsize=(6, 3))
ax = fig6.add_subplot(1, 2, 1)
draw_image(ax, predictions[i], test_images[i], test_labels[i][0])
ax = fig6.add_subplot(1, 2, 2)
draw_bar(ax, predictions[i], test_labels[i][0])

In [None]:
for i in range(len(test_images)):
    if test_labels[i] != np.argmax(predictions[i]):
        break
fig7 = plt.figure(figsize=(6, 3))
ax = fig7.add_subplot(1, 2, 1)
draw_image(ax, predictions[i], test_images[i], test_labels[i][0])
ax = fig7.add_subplot(1, 2, 2)
draw_bar(ax, predictions[i], test_labels[i][0])

In [None]:
base = 5000
rows = 1 * 5
cols = 2 * 3
fig8 = plt.figure(figsize=(2.5*cols, 2.5*rows))
fig8.set_facecolor('white')
for i in range(0, rows*cols, 2):
    ax = fig8.add_subplot(rows, cols, i+1)
    draw_image(ax, predictions[base+i], test_images[base+i], test_labels[base+i][0])
    ax = fig8.add_subplot(rows, cols, i+2)
    draw_bar(ax, predictions[base+i], test_labels[base+i][0])

### 10. Convolution Neural Network

In [None]:
outputs = [layer.output for layer in model.layers[:-3]]
intermediate_model = tf.keras.models.Model(inputs=model.input,
                                           outputs=outputs)
intermediate_model.summary()

In [None]:
image_idx = 1
model_idx = 4
intermediate_output = intermediate_model.predict(tf.expand_dims(test_images[image_idx], 0))
data = intermediate_output[model_idx]

fig9 = plt.figure(figsize=(3, 3))
fig9.set_facecolor('white')
ax = fig9.add_subplot()
ax.imshow(train_images[image_idx])
ax.grid(False)

fig10 = plt.figure(figsize=(16, math.ceil(data.shape[-1]/16)))
fig10.set_facecolor('white')
for i in range(0, data.shape[-1]):
    ax = fig10.add_subplot(math.ceil(data.shape[-1]/16), 16, i+1)
    ax.imshow(data[0, :, :, i])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.grid(False)

In [None]:
image_idx = 0
intermediate_output = intermediate_model.predict(tf.expand_dims(test_images[image_idx], 0))

fig9 = plt.figure(figsize=(3, 3))
fig9.set_facecolor('white')
ax = fig9.add_subplot()
ax.imshow(train_images[image_idx])
ax.grid(False)

for layer_idx in range(0, len(intermediate_output)):
    data = intermediate_output[layer_idx]
    fig = plt.figure(figsize=(16, math.ceil(data.shape[-1]/16)))
    fig.set_facecolor('white')
    for i in range(0, data.shape[-1]):
        ax = fig.add_subplot(math.ceil(data.shape[-1]/16), 16, i+1)
        ax.imshow(data[0, :, :, i])
        ax.set_xticks([])
        ax.set_yticks([])
        ax.grid(False)