## 匯入模組

In [None]:
import torch  # 在本專案中只有轉換資料型態的功能而已

from tensorflow.keras.utils import to_categorical  # one hot encoding
from tensorflow.keras.models import Sequential  # 函數學習機
from tensorflow.keras.layers import Dense  # 隱藏層
from tensorflow.keras.optimizers import Adam  # 優化器(學習方法)

import numpy as np
import pandas as pd

## 讀取資料

**函数注解（Function Annotations）**：用於標註函式輸入、輸出的資料型態 \

`def load_data(lang:str, data_type:str) -> np.array:` \
代表輸入皆為字串(str)，輸出為陣列(array) \
\
可以看成： \
`def load_data(lang, data_type):` \
\
相關解說影片：https://www.youtube.com/watch?v=BzBUagNkX1E

In [None]:
def load_data(lang:str, data_type:str) -> np.array:
    # 讀取檔案，因為路徑有固定格式，因此使用 f-string 
    path = "./pt"
    datas = torch.load(f"{path}/{lang}_{data_type}_emb.pt")

    # 把每筆資料從 torch.tensor 轉成 numpy.array ，並且將維度設定為(256,)
    return np.array([data.numpy().reshape(256,) for data in datas])

逐一調取資料，並分配給對應的變數名稱 \
`a, b, c, d, e = [1, 2, 3, 4, 5] \
print(a, b, c, d, e) \
|>>1 2 3 4 5` \

In [None]:
lang_list = ["zh", "ru", "es", "de", "ar"]

zh_train, ru_train, es_train, de_train, ar_train  = [load_data(lang, "train") for lang in lang_list]
zh_valid, ru_valid, es_valid, de_valid, ar_valid  = [load_data(lang, "valid") for lang in lang_list]
zh_test, ru_test, es_test, de_test, ar_test  = [load_data(lang, "test") for lang in lang_list]

## 檢視資料

In [None]:
print(zh_train.shape, ru_train.shape, es_train.shape, de_train.shape, ar_train.shape, sep="\t")
print(zh_valid.shape, ru_valid.shape, es_valid.shape, de_valid.shape, ar_valid.shape, sep="\t")
print(zh_test.shape, ru_test.shape, es_test.shape, de_test.shape, ar_test.shape, sep="\t")

(22048, 256)	(15481, 256)	(20000, 256)	(20000, 256)	(14227, 256)
(11638, 256)	(7963, 256)	(15089, 256)	(15588, 256)	(7517, 256)
(11655, 256)	(8007, 256)	(15089, 256)	(15588, 256)	(7622, 256)


In [None]:
zh_train

array([[ -0.37338284,   5.0748515 ,  -0.6773103 , ...,   1.8847569 ,
          5.486586  ,  -5.299919  ],
       [ -2.410383  ,   2.6575012 ,   0.584177  , ...,  -0.09867393,
         -0.16986103,  -1.608001  ],
       [  2.537675  ,   3.305266  ,  -3.0168207 , ...,   4.127087  ,
          4.308145  ,  -7.2403708 ],
       ...,
       [ -5.355678  ,   2.1825886 ,   4.4823003 , ...,  -3.801538  ,
          4.2385287 ,  -5.352148  ],
       [ -0.36605218,  -1.2724755 ,  -2.4574277 , ...,  -1.0902072 ,
          0.03735523,  -4.465888  ],
       [-10.756033  ,  -0.6331    ,   4.7277217 , ...,  -6.9008408 ,
          4.7123876 ,  -0.7721901 ]], dtype=float32)

In [None]:
zh_train[0].shape

(256,)

## 整合資料

#### 調整資料數量(能避免電腦去猜答案，影響準確率)

In [None]:
zh_train = zh_train[:20000]
ru_train = np.concatenate((ru_train, ru_train[:4519]))
ar_train = np.concatenate((ar_train, ar_train[:5773]))

In [None]:
zh_valid = np.concatenate((zh_valid, zh_valid[:3362]))
ru_valid = np.concatenate((ru_valid, ru_valid[:7037]))
ar_valid = np.concatenate((ar_valid, ar_valid[:7483]))

In [None]:
zh_test = np.concatenate((zh_test, zh_test[:3345]))
ru_test = np.concatenate((ru_test, ru_test[:6993]))
ar_test = np.concatenate((ar_test, ar_test[:7378]))

檢視各資料

調整後的數量
- train: 20000
- valid: 15000
- test: 15000

註：部分資料的數量，因為原本已經很靠近目標了，因此決定不進行變更

In [None]:
print(zh_train.shape, ru_train.shape, es_train.shape, de_train.shape, ar_train.shape, sep="\t")
print(zh_valid.shape, ru_valid.shape, es_valid.shape, de_valid.shape, ar_valid.shape, sep="\t")
print(zh_test.shape, ru_test.shape, es_test.shape, de_test.shape, ar_test.shape, sep="\t")

(20000, 256)	(20000, 256)	(20000, 256)	(20000, 256)	(20000, 256)
(15000, 256)	(15000, 256)	(15089, 256)	(15588, 256)	(15000, 256)
(15000, 256)	(15000, 256)	(15089, 256)	(15588, 256)	(15000, 256)


#### 合併資料

將 x 值合併

In [None]:
x_train = np.concatenate((zh_train, ru_train, es_train, de_train, ar_train))
x_valid = np.concatenate((zh_valid, ru_valid, es_valid, de_valid, ar_valid))
x_test = np.concatenate((zh_test, ru_test, es_test, de_test, ar_test))

依據各個語言的資料長度，生成對應的 y 值

In [None]:
# ["zh", "ru", "es", "de", "ar"]
y_train = np.array([0]*zh_train.shape[0] + [1]*ru_train.shape[0] + [2]*es_train.shape[0] + [3]*de_train.shape[0] + [4]*ar_train.shape[0])
y_valid = np.array([0]*zh_valid.shape[0] + [1]*ru_valid.shape[0] + [2]*es_valid.shape[0] + [3]*de_valid.shape[0] + [4]*ar_valid.shape[0])
y_test = np.array([0]*zh_test.shape[0] + [1]*ru_test.shape[0] + [2]*es_test.shape[0] + [3]*de_test.shape[0] + [4]*ar_test.shape[0])

將 y 值 one hot encoding \
因為語言有 5 種，所以參數要填 5

In [None]:
y_train = to_categorical(y_train, 5)
y_valid = to_categorical(y_valid, 5)
y_test = to_categorical(y_test, 5)

檢視各資料的維度

In [None]:
print(x_train.shape, x_valid.shape, x_test.shape, sep="\t")
print(y_train.shape, y_valid.shape, y_test.shape, sep="\t")

(100000, 256)	(75677, 256)	(75677, 256)
(100000, 5)	(75677, 5)	(75677, 5)


## 組裝神經網路

In [None]:
model = Sequential()
model.add(Dense(256,input_shape=(256,), activation='relu'))  # 每筆資料的維度為 (256,)
model.add(Dense(512, activation='relu'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(5, activation='softmax'))  # 5 個結果，所以輸出層的參數為 5

因為具有分類性質，因此 loss function 選擇 "categorical_crossentropy"

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(learning_rate=0.005), 
              metrics=['accuracy'])

觀看神經網路

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               65792     
                                                                 
 dense_1 (Dense)             (None, 512)               131584    
                                                                 
 dense_2 (Dense)             (None, 1024)              525312    
                                                                 
 dense_3 (Dense)             (None, 5)                 5125      
                                                                 
Total params: 727,813
Trainable params: 727,813
Non-trainable params: 0
_________________________________________________________________


## 訓練

In [None]:
model.fit(x_train, y_train, batch_size=20, epochs=10,
          validation_data=(x_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa8b783cd50>

## 預測

In [None]:
score = model.evaluate(x_test, y_test, batch_size=1)



In [None]:
y_predict = np.argmax(model.predict(x_test), axis=-1)

In [None]:
n = 33000
print(np.argmax(y_test[n]), y_predict[n], sep="|")

2|2


## 儲存

In [None]:
model_path = "./Group26_audio_classification_model"
model.save(model_path)