In [1]:
# Colab PRO一個月300有找，就有V100用，好耶
!nvidia-smi

Fri Apr  2 14:07:08 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# 下載資料
!gdown --id '1vpB54zSsLL2st0TI1edwRORIIK1c0yo_' --output data.zip
!unzip data.zip
!ls 

Downloading...
From: https://drive.google.com/uc?id=1vpB54zSsLL2st0TI1edwRORIIK1c0yo_
To: /content/data.zip
372MB [00:05, 71.2MB/s]
Archive:  data.zip
   creating: timit_11/
  inflating: timit_11/train_11.npy   
  inflating: timit_11/test_11.npy    
  inflating: timit_11/train_label_11.npy  
data.zip  sample_data  timit_11


In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import mixed_precision
from sklearn.model_selection import KFold, StratifiedKFold
from tqdm.notebook import tqdm

In [5]:
# 混合精度加速訓練
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: Tesla V100-SXM2-16GB, compute capability 7.0


In [6]:
print('Loading data ...')

data_root='./timit_11/'
train = np.load(data_root + 'train_11.npy').reshape(-1, 11, 39)
train_label = np.load(data_root + 'train_label_11.npy').astype('int')
test = np.load(data_root + 'test_11.npy').reshape(-1, 11, 39)

print('Size of training data: {}'.format(train.shape))
print('Size of testing data: {}'.format(test.shape))

Loading data ...
Size of training data: (1229932, 11, 39)
Size of testing data: (451552, 11, 39)


In [7]:
# 將mfcc window資料轉回原本的sentence長度
def data_to_sents(data, label=None):
    sent, sent_label = [], []
    sents, sent_labels = [], []

    for i in tqdm(range(len(data))):
        if i == 0:
            sent = list(data[i])
            if label is not None:
                sent_label = [label[i]]
            continue

        if np.any(data[i][0] != data[i-1][1]):
            sents.append(np.array(sent))
            sent = list(data[i])
            
            if label is not None:
                sent_labels.append(sent_label)
                sent_label = [label[i]]
                
        else:
            sent.append(data[i][-1])
            if label is not None:
                sent_label.append(label[i])

    sents.append(np.array(sent))
    if label is not None:
        sent_labels.append(sent_label)
    
    if label is not None:
        
        for i in range(len(sent_labels)):
            sent_labels[i] = [sent_labels[i][0]]*5 + sent_labels[i] + [sent_labels[i][-1]]*5
        
        return sents, sent_labels
    
    return sents

In [8]:
train_sents, train_sent_labels = data_to_sents(train, train_label)
test_sents = data_to_sents(test)

HBox(children=(FloatProgress(value=0.0, max=1229932.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=451552.0), HTML(value='')))




In [10]:
# 將資料補到相同長度
padded_train_sents = tf.keras.preprocessing.sequence.pad_sequences(
    train_sents, maxlen=725, padding="post", dtype='float32', value=0.0
)

padded_train_labels = tf.keras.preprocessing.sequence.pad_sequences(
    train_sent_labels, maxlen=725, padding="post", dtype='int32', value=-1
)

padded_test_sents = tf.keras.preprocessing.sequence.pad_sequences(
    test_sents, maxlen=725, padding="post", dtype='float32', value=0.0
)

In [11]:
# Sample weights，沒有資料的部分不要算loss
padded_train_labels_sw = (padded_train_labels != -1).astype('int')
padded_train_labels[padded_train_labels == -1] = 0

In [12]:
padded_train_sents.shape, padded_train_labels.shape, padded_test_sents.shape

((4620, 725, 39), (4620, 725), (1680, 725, 39))

In [13]:
from tensorflow.keras.layers import Bidirectional, LSTM, GRU, Dense, Conv1D
from tensorflow.keras.layers import BatchNormalization, Activation, Flatten, GlobalAveragePooling1D, Dropout
from tensorflow.keras.layers import Conv2D, Reshape, MaxPooling2D
from tensorflow.keras.optimizers import Adam, SGD, RMSprop

In [14]:
# 網路定義，參考pytorch-kaldi的做法
# https://github.com/mravanelli/pytorch-kaldi/blob/master/cfg/TIMIT_baselines/TIMIT_LSTM_mfcc.cfg
import tensorflow.keras.backend as K

def make_model():
    model = keras.Sequential([
        keras.Input(shape=(725, 39)),
        keras.layers.Masking(mask_value=0.0),
        
        Bidirectional(LSTM(256, return_sequences=True)),
        Dropout(0.2),
        Bidirectional(LSTM(256, return_sequences=True)),
        Dropout(0.2),
        
        Dense(39, activation='softmax'),
    ])
    
    model.compile(RMSprop(learning_rate=0.016), 'sparse_categorical_crossentropy', 'accuracy')
    
    return model

In [18]:
# 20 folds 訓練
kf = KFold(n_splits=20)

for i_fold, (train_index, valid_index) in enumerate(tqdm(kf.split(padded_train_sents))):
    print(f'Fold {i_fold}')
    tf.random.set_seed(8889 + i_fold)
    
    X_train, X_valid = padded_train_sents[train_index], padded_train_sents[valid_index]
    y_train, y_valid = padded_train_labels[train_index], padded_train_labels[valid_index]
    sw_train, sw_valid = padded_train_labels_sw[train_index], padded_train_labels_sw[valid_index]
    
    model = make_model()
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                                  patience=4,
                                                  restore_best_weights=True,
                                                  verbose=1)
    
    lr_schedule = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                   factor=0.5,
                                                   patience=1,
                                                   verbose=1,
                                                   min_lr=5e-4,
                                                   min_delta=0.0003)

    history = model.fit(
        X_train,
        y_train,
        sample_weight=sw_train,
        epochs=100,
        batch_size=64,
        validation_batch_size=256,
        validation_data=(X_valid, y_valid, sw_valid),
        callbacks=[early_stopping, lr_schedule],
    )
    
    model.save(f'lstm1/folf_{i_fold}.h5')

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Fold 0
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.00800000037997961.
Epoch 5/100
Epoch 6/100

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.004000000189989805.
Epoch 7/100

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0020000000949949026.
Epoch 8/100

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0010000000474974513.
Epoch 9/100

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/100

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0005.
Epoch 11/100

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0005.
Epoch 12/100

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0005.
Epoch 13/100
Restoring model weights from the end of the best epoch.

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0005.
Epoch 00013: early stopping
Fold 1
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100

Epoch 00005: Re

In [19]:
# 20 folds預測
models_pred = []

for i_fold in range(20):
    model = keras.models.load_model(f'lstm1/folf_{i_fold}.h5')
    test_pred = model.predict(padded_test_sents, batch_size=256, verbose=1)
    models_pred.append(test_pred)



In [20]:
# 平均預測結果
test_pred = np.mean(models_pred, axis=0)
print(test_pred.shape)

(1680, 725, 39)


In [21]:
# 將資料格式還原
test_pred_idx = np.argmax(test_pred, axis=2)
test_pred_flat = []
for pred, test_sent in zip(test_pred_idx, test_sents):
    test_pred_flat.extend(pred[5:5+len(test_sent)-10])

In [22]:
submission = pd.DataFrame()
submission['Class'] = test_pred_flat

In [23]:
submission.to_csv('lstm1_20folds.csv', index_label='Id')