# **Homework 2: Phoneme Classification**


Objectives:
* Solve a classification problem with deep neural networks (DNNs).
* Understand recursive neural networks (RNNs).

If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

# Import Library

In [266]:
import os
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import random
import numpy as np
from tqdm import tqdm
import gc
import datetime
import math

# Environment Confirmation

In [267]:
!nvidia-smi

Fri Nov 17 17:27:35 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 537.13                 Driver Version: 537.13       CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  | 00000000:01:00.0  On |                  N/A |
| N/A   45C    P5               6W / 114W |   1706MiB /  8188MiB |     16%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [268]:
# 获取当前工作环境
def KaggleColabLocal():
    env = dict(
        inKaggle=False,
        inColab=False,
        inLocal=False
    )

    # 检查是否在Kaggle环境中
    if 'KAGGLE_KERNEL_RUN_TYPE' in os.environ and os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive':
        # print("在Kaggle环境中运行")
        env['inKaggle'] = True
    # 检查是否在Colab环境中
    elif 'google.colab' in str(get_ipython()):
        # print("在Colab环境中运行")
        env['inColab'] = True
    # 检查是否在本地JupyterLab环境中
    elif 'JPY_PARENT_PID' in os.environ:
        # print("在本地JupyterLab环境中运行")
        env['inLocal'] = True
    else:
        print("在其他环境中运行")
    return env


KaggleColabLocal()

try:
    del train_set,val_set
    del train_loader,val_loader
except:
    print("free memory: param already deleted.")
gc.collect()

free memory: param already deleted.


3923

# Download Data
Download data from google drive, then unzip it.

You should have
- `libriphone/train_split.txt`: training metadata
- `libriphone/train_labels`: training labels
- `libriphone/test_split.txt`: testing metadata
- `libriphone/feat/train/*.pt`: training feature
- `libriphone/feat/test/*.pt`:  testing feature

after running the following block.

> **Notes: if the google drive link is dead, you can download the data directly from [Kaggle](https://www.kaggle.com/c/ml2023spring-hw2/data) and upload it to the workspace.**


In [269]:
Current_Env = KaggleColabLocal() # 获取当前环境

# 如果文件已经下载，那不用重新下载文件
commonPath = './libriphone'
if Current_Env['inKaggle'] == True:
    commonPath = '/kaggle/working/libriphone'

filePath = commonPath + '/feat/train/103-1240-0015.pt';

if os.path.exists(filePath) == False:
    if Current_Env['inKaggle'] or Current_Env['inColab']:
        !pip install --upgrade gdown
        # Main link
        # !gdown --id '1N1eVIDe9hKM5uiNRGmifBlwSDGiVXPJe' --output libriphone.zip
        !gdown --id '1qzCRnywKh30mTbWUEjXuNT2isOCAPdO1' --output libriphone.zip

        !unzip -q libriphone.zip
        !ls libriphone
    elif Current_Env['inLocal']:
        raise Exception('本地环境中文件不存在，需要重新下载，地址：https://www.kaggle.com/c/ml2023spring-hw2/data')
    else:
        raise Exception('获取文件失败，无法判断当前运行环境，也无法找到文件路径，需要重新下载，地址：https://www.kaggle.com/c/ml2023spring-hw2/data')
else:
    print('Data File already exist.Skip!')

Data File already exist.Skip!


# Playground

In [270]:
# a = torch.load('./libriphone/feat/train/103-1240-0015.pt')
# print(len(a))
# print(len(a[0]))
# print(a)

# Some Utility Functions
**Fixes random number generator seeds for reproducibility.**

In [271]:
def same_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

**Helper functions to pre-process the training data from raw MFCC features of each utterance.**

A phoneme may span several frames and is dependent to past and future frames. \
Hence we concatenate neighboring phonemes for training to achieve higher accuracy. The **concat_feat** function concatenates past and future k frames (total 2k+1 = n frames), and we predict the center frame.

Feel free to modify the data preprocess functions, but **do not drop any frame** (if you modify the functions, remember to check that the number of frames are the same as mentioned in the slides)

In [272]:
def load_feat(path):
    feat = torch.load(path)
    return feat


def shift(x, n):
    if n < 0:
        left = x[0].repeat(-n, 1)
        right = x[:n]
    elif n > 0:
        right = x[-1].repeat(n, 1)
        left = x[n:]
    else:
        return x

    return torch.cat((left, right), dim=0)


def concat_feat(x, concat_n):
    assert concat_n % 2 == 1  # n must be odd
    if concat_n < 2:
        return x
    seq_len, feature_dim = x.size(0), x.size(1)
    x = x.repeat(1, concat_n)
    x = x.view(seq_len, concat_n, feature_dim).permute(
        1, 0, 2)  # concat_n, seq_len, feature_dim
    mid = (concat_n // 2)
    for r_idx in range(1, mid+1):
        x[mid + r_idx, :] = shift(x[mid + r_idx], r_idx)
        x[mid - r_idx, :] = shift(x[mid - r_idx], -r_idx)

    return x.permute(1, 0, 2).view(seq_len, concat_n * feature_dim)


def preprocess_data(split, feat_dir, phone_path, concat_nframes, train_ratio=0.8, random_seed=1213):
    class_num = 41  # NOTE: pre-computed, should not need change

    if split == 'train' or split == 'val':
        mode = 'train'
    elif split == 'test':
        mode = 'test'
    else:
        raise ValueError(
            'Invalid \'split\' argument for dataset: PhoneDataset!')

    label_dict = {}
    if mode == 'train':
        for line in open(os.path.join(phone_path, f'{mode}_labels.txt')).readlines():
            line = line.strip('\n').split(' ')
            label_dict[line[0]] = [int(p) for p in line[1:]]

        # split training and validation data
        usage_list = open(os.path.join(
            phone_path, 'train_split.txt')).readlines()
        random.seed(random_seed)
        random.shuffle(usage_list)
        train_len = int(len(usage_list) * train_ratio)
        usage_list = usage_list[:train_len] if split == 'train' else usage_list[train_len:]

    elif mode == 'test':
        usage_list = open(os.path.join(
            phone_path, 'test_split.txt')).readlines()

    usage_list = [line.strip('\n') for line in usage_list]
    print('[Dataset] - # phone classes: ' + str(class_num) +
          ', number of utterances for ' + split + ': ' + str(len(usage_list)))

    max_len = 3000000
    X = torch.empty(max_len, 39 * concat_nframes)
    if mode == 'train':
        y = torch.empty(max_len, dtype=torch.long)

    idx = 0
    for i, fname in tqdm(enumerate(usage_list)):
        feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt'))
        cur_len = len(feat)
        feat = concat_feat(feat, concat_nframes)
        if mode == 'train':
            label = torch.LongTensor(label_dict[fname])

        X[idx: idx + cur_len, :] = feat
        if mode == 'train':
            y[idx: idx + cur_len] = label

        idx += cur_len

    X = X[:idx, :]
    if mode == 'train':
        y = y[:idx]

    print(f'[INFO] {split} set')
    print(X.shape)
    if mode == 'train':
        print(y.shape)
        return X, y
    else:
        return X

# Dataset

In [273]:
class LibriDataset(Dataset):
    def __init__(self, X, y=None):
        self.data = X
        if y is not None:
            self.label = torch.LongTensor(y)
        else:
            self.label = None

    def __getitem__(self, idx):
        if self.label is not None:
            return self.data[idx], self.label[idx]
        else:
            return self.data[idx]

    def __len__(self):
        return len(self.data)

# Model
Feel free to modify the structure of the model.

In [274]:
class BasicBlock(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_p):
        super(BasicBlock, self).__init__()

        # TODO: apply batch normalization and dropout for strong baseline.
        # Reference: https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html (batch normalization)
        #       https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html (dropout)
        self.block = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.BatchNorm1d(output_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout_p),
        )

    def forward(self, x):
        x = self.block(x)
        return x


class Classifier(nn.Module):
    def __init__(self, input_dim, output_dim=41, hidden_layers=1, hidden_dim=256, dropout_p=0.5):
        super(Classifier, self).__init__()

        self.fc = nn.Sequential(
            BasicBlock(input_dim, hidden_dim, dropout_p),
            *[BasicBlock(hidden_dim, hidden_dim, dropout_p)
              for _ in range(hidden_layers)],
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        x = self.fc(x)
        return x

# Hyper-parameters

In [275]:
# data prarameters
# TODO: change the value of "concat_nframes" for medium baseline
# the number of frames to concat with, n must be odd (total 2k+1 = n frames)
concat_nframes = 21
# the ratio of data used for training, the rest will be used for validation
train_ratio = 0.75

# training parameters
seed = 19871201          # random seed
batch_size = 512        # batch size
num_epoch = 300         # the number of training epoch
learning_rate = 1e-3      # learning rate
model_path = './model.ckpt'  # the path where the checkpoint will be saved

# model parameters
# TODO: change the value of "hidden_layers" or "hidden_dim" for medium baseline
# the input dim of the model, you should not change the value
input_dim = 39 * concat_nframes
hidden_layers = 2        # the number of hidden layers
hidden_dim = 1750           # the hidden dim
dropout_p = 0.5

# env related
commonPath = './libriphone'
if Current_Env['inKaggle'] is True:
    commonPath = '/kaggle/working/libriphone'

# Dataloader

In [276]:
same_seeds(seed)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'DEVICE: {device}')

# preprocess data
train_X, train_y = preprocess_data(split='train', feat_dir=commonPath + '/feat', phone_path=commonPath,
                                   concat_nframes=concat_nframes, train_ratio=train_ratio, random_seed=seed)
val_X, val_y = preprocess_data(split='val', feat_dir=commonPath + '/feat', phone_path=commonPath,
                               concat_nframes=concat_nframes, train_ratio=train_ratio, random_seed=seed)

# get dataset
train_set = LibriDataset(train_X, train_y)
val_set = LibriDataset(val_X, val_y)

# remove raw feature to save memory
del train_X, train_y, val_X, val_y
gc.collect()

# get dataloader
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

DEVICE: cuda
[Dataset] - # phone classes: 41, number of utterances for train: 2571


2571it [00:11, 226.26it/s]


[INFO] train set
torch.Size([1580384, 819])
torch.Size([1580384])
[Dataset] - # phone classes: 41, number of utterances for val: 858


858it [00:03, 214.76it/s]

[INFO] val set
torch.Size([536410, 819])
torch.Size([536410])





# Training

## 测试默认配置下,各显卡的表现
- 本地显卡：168秒
- Kaggle: T100 239秒
- Colab: 200秒以上

结论：本地显卡>Kaggle>Colab

In [263]:
# log train start time
starttime = datetime.datetime.now()

# create model, define a loss function, and optimizer
model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers,
                   hidden_dim=hidden_dim, dropout_p=dropout_p).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

best_acc = 0.0
for epoch in range(num_epoch):
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # training
    model.train()  # set the model to training mode
    for i, batch in enumerate(tqdm(train_loader)):
        features, labels = batch
        features = features.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(features)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # get the index of the class with the highest probability
        _, train_pred = torch.max(outputs, 1)
        train_acc += (train_pred.detach() == labels.detach()).sum().item()
        train_loss += loss.item()

    # validation
    model.eval()  # set the model to evaluation mode
    with torch.no_grad():
        for i, batch in enumerate(tqdm(val_loader)):
            features, labels = batch
            features = features.to(device)
            labels = labels.to(device)
            outputs = model(features)

            loss = criterion(outputs, labels)

            _, val_pred = torch.max(outputs, 1)
            # get the index of the class with the highest probability
            val_acc += (val_pred.cpu() == labels.cpu()).sum().item()
            val_loss += loss.item()

    print(f'[{epoch+1:03d}/{num_epoch:03d}] Train Acc: {train_acc/len(train_set):3.5f} Loss: {train_loss/len(train_loader):3.5f} | Val Acc: {val_acc/len(val_set):3.5f} loss: {val_loss/len(val_loader):3.5f}')

    # if the model improves, save a checkpoint at this epoch
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), model_path)
        print(f'saving model with acc {best_acc/len(val_set):.5f}')

# log train cost time
print('total train start at:', starttime)
print('concat_nframes:', concat_nframes, ',num_epoch:',
      num_epoch, ',learning_rate:', learning_rate, ',Dropout:')
totalseconds = (datetime.datetime.now()-starttime).seconds
print('total train cost: ', math.floor(totalseconds/60),
      'minutes', totalseconds % 60, 'seconds (total seconds:', totalseconds, 's).')

100%|██████████| 3087/3087 [00:38<00:00, 79.52it/s] 
100%|██████████| 1048/1048 [00:07<00:00, 145.95it/s]


[001/060] Train Acc: 0.57672 Loss: 1.38555 | Val Acc: 0.64819 loss: 1.12797
saving model with acc 0.64819


100%|██████████| 3087/3087 [00:37<00:00, 81.41it/s]
100%|██████████| 1048/1048 [00:06<00:00, 153.89it/s]


[002/060] Train Acc: 0.63254 Loss: 1.17914 | Val Acc: 0.67381 loss: 1.04029
saving model with acc 0.67381


100%|██████████| 3087/3087 [00:37<00:00, 82.44it/s]
100%|██████████| 1048/1048 [00:06<00:00, 153.55it/s]


[003/060] Train Acc: 0.65332 Loss: 1.10234 | Val Acc: 0.68712 loss: 0.99449
saving model with acc 0.68712


100%|██████████| 3087/3087 [00:37<00:00, 81.67it/s]
100%|██████████| 1048/1048 [00:06<00:00, 166.27it/s]


[004/060] Train Acc: 0.66748 Loss: 1.05205 | Val Acc: 0.69782 loss: 0.95725
saving model with acc 0.69782


100%|██████████| 3087/3087 [00:38<00:00, 80.80it/s]
100%|██████████| 1048/1048 [00:10<00:00, 101.42it/s]


[005/060] Train Acc: 0.67841 Loss: 1.01397 | Val Acc: 0.70458 loss: 0.93802
saving model with acc 0.70458


100%|██████████| 3087/3087 [00:37<00:00, 82.66it/s]
100%|██████████| 1048/1048 [00:06<00:00, 168.04it/s]


[006/060] Train Acc: 0.68603 Loss: 0.98637 | Val Acc: 0.70866 loss: 0.92332
saving model with acc 0.70866


100%|██████████| 3087/3087 [00:37<00:00, 83.09it/s]
100%|██████████| 1048/1048 [00:06<00:00, 155.37it/s]


[007/060] Train Acc: 0.69219 Loss: 0.96489 | Val Acc: 0.71308 loss: 0.90736
saving model with acc 0.71308


100%|██████████| 3087/3087 [00:32<00:00, 93.73it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 181.10it/s]


[008/060] Train Acc: 0.69746 Loss: 0.94508 | Val Acc: 0.71658 loss: 0.89646
saving model with acc 0.71658


100%|██████████| 3087/3087 [00:32<00:00, 94.61it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 186.74it/s]


[009/060] Train Acc: 0.70194 Loss: 0.92926 | Val Acc: 0.71790 loss: 0.89354
saving model with acc 0.71790


100%|██████████| 3087/3087 [00:32<00:00, 94.78it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 194.51it/s]


[010/060] Train Acc: 0.70603 Loss: 0.91483 | Val Acc: 0.71934 loss: 0.88530
saving model with acc 0.71934


100%|██████████| 3087/3087 [00:32<00:00, 95.05it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 185.69it/s]


[011/060] Train Acc: 0.70970 Loss: 0.90230 | Val Acc: 0.72229 loss: 0.87826
saving model with acc 0.72229


100%|██████████| 3087/3087 [00:31<00:00, 97.67it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 207.52it/s]


[012/060] Train Acc: 0.71257 Loss: 0.89165 | Val Acc: 0.72416 loss: 0.87387
saving model with acc 0.72416


100%|██████████| 3087/3087 [00:31<00:00, 98.87it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 198.27it/s]


[013/060] Train Acc: 0.71507 Loss: 0.88115 | Val Acc: 0.72503 loss: 0.87078
saving model with acc 0.72503


100%|██████████| 3087/3087 [00:30<00:00, 99.68it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 191.09it/s]


[014/060] Train Acc: 0.71802 Loss: 0.87161 | Val Acc: 0.72592 loss: 0.86562
saving model with acc 0.72592


100%|██████████| 3087/3087 [00:30<00:00, 100.08it/s]
100%|██████████| 1048/1048 [00:05<00:00, 190.10it/s]


[015/060] Train Acc: 0.72050 Loss: 0.86332 | Val Acc: 0.72718 loss: 0.86141
saving model with acc 0.72718


100%|██████████| 3087/3087 [00:30<00:00, 99.75it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 205.40it/s]


[016/060] Train Acc: 0.72244 Loss: 0.85583 | Val Acc: 0.72878 loss: 0.85978
saving model with acc 0.72878


100%|██████████| 3087/3087 [00:30<00:00, 100.38it/s]
100%|██████████| 1048/1048 [00:05<00:00, 201.87it/s]


[017/060] Train Acc: 0.72470 Loss: 0.84766 | Val Acc: 0.72919 loss: 0.85663
saving model with acc 0.72919


100%|██████████| 3087/3087 [00:30<00:00, 102.32it/s]
100%|██████████| 1048/1048 [00:05<00:00, 204.04it/s]


[018/060] Train Acc: 0.72648 Loss: 0.84145 | Val Acc: 0.73079 loss: 0.85387
saving model with acc 0.73079


100%|██████████| 3087/3087 [00:30<00:00, 100.09it/s]
100%|██████████| 1048/1048 [00:05<00:00, 202.83it/s]


[019/060] Train Acc: 0.72820 Loss: 0.83518 | Val Acc: 0.73127 loss: 0.85310
saving model with acc 0.73127


100%|██████████| 3087/3087 [00:31<00:00, 98.16it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 206.24it/s]


[020/060] Train Acc: 0.72983 Loss: 0.82864 | Val Acc: 0.73127 loss: 0.85140
saving model with acc 0.73127


100%|██████████| 3087/3087 [00:31<00:00, 99.05it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 199.96it/s]


[021/060] Train Acc: 0.73208 Loss: 0.82238 | Val Acc: 0.73252 loss: 0.84649
saving model with acc 0.73252


100%|██████████| 3087/3087 [00:30<00:00, 100.06it/s]
100%|██████████| 1048/1048 [00:05<00:00, 203.78it/s]


[022/060] Train Acc: 0.73359 Loss: 0.81675 | Val Acc: 0.73323 loss: 0.84688
saving model with acc 0.73323


100%|██████████| 3087/3087 [00:30<00:00, 99.71it/s] 
100%|██████████| 1048/1048 [00:04<00:00, 210.12it/s]


[023/060] Train Acc: 0.73507 Loss: 0.81181 | Val Acc: 0.73310 loss: 0.84535


100%|██████████| 3087/3087 [00:30<00:00, 101.09it/s]
100%|██████████| 1048/1048 [00:05<00:00, 209.07it/s]


[024/060] Train Acc: 0.73576 Loss: 0.80845 | Val Acc: 0.73484 loss: 0.84150
saving model with acc 0.73484


100%|██████████| 3087/3087 [00:30<00:00, 100.84it/s]
100%|██████████| 1048/1048 [00:05<00:00, 197.30it/s]


[025/060] Train Acc: 0.73710 Loss: 0.80366 | Val Acc: 0.73492 loss: 0.83959
saving model with acc 0.73492


100%|██████████| 3087/3087 [00:30<00:00, 100.63it/s]
100%|██████████| 1048/1048 [00:05<00:00, 195.00it/s]


[026/060] Train Acc: 0.73871 Loss: 0.79838 | Val Acc: 0.73568 loss: 0.83935
saving model with acc 0.73568


100%|██████████| 3087/3087 [00:31<00:00, 98.51it/s] 
100%|██████████| 1048/1048 [00:04<00:00, 209.92it/s]


[027/060] Train Acc: 0.74005 Loss: 0.79407 | Val Acc: 0.73447 loss: 0.83952


100%|██████████| 3087/3087 [00:30<00:00, 99.94it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 197.41it/s]


[028/060] Train Acc: 0.74107 Loss: 0.78982 | Val Acc: 0.73656 loss: 0.83565
saving model with acc 0.73656


100%|██████████| 3087/3087 [00:30<00:00, 100.05it/s]
100%|██████████| 1048/1048 [00:05<00:00, 191.06it/s]


[029/060] Train Acc: 0.74171 Loss: 0.78694 | Val Acc: 0.73593 loss: 0.83820


100%|██████████| 3087/3087 [00:30<00:00, 100.42it/s]
100%|██████████| 1048/1048 [00:05<00:00, 203.70it/s]


[030/060] Train Acc: 0.74285 Loss: 0.78324 | Val Acc: 0.73616 loss: 0.83689


100%|██████████| 3087/3087 [00:30<00:00, 101.26it/s]
100%|██████████| 1048/1048 [00:04<00:00, 214.71it/s]


[031/060] Train Acc: 0.74411 Loss: 0.77930 | Val Acc: 0.73753 loss: 0.83629
saving model with acc 0.73753


100%|██████████| 3087/3087 [00:30<00:00, 100.70it/s]
100%|██████████| 1048/1048 [00:05<00:00, 196.86it/s]


[032/060] Train Acc: 0.74519 Loss: 0.77574 | Val Acc: 0.73827 loss: 0.83528
saving model with acc 0.73827


100%|██████████| 3087/3087 [00:30<00:00, 99.67it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 200.71it/s]


[033/060] Train Acc: 0.74646 Loss: 0.77193 | Val Acc: 0.73752 loss: 0.83472


100%|██████████| 3087/3087 [00:31<00:00, 99.43it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 199.06it/s]


[034/060] Train Acc: 0.74679 Loss: 0.76893 | Val Acc: 0.73822 loss: 0.83387


100%|██████████| 3087/3087 [00:30<00:00, 99.78it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 191.37it/s]


[035/060] Train Acc: 0.74742 Loss: 0.76658 | Val Acc: 0.73874 loss: 0.83340
saving model with acc 0.73874


100%|██████████| 3087/3087 [00:31<00:00, 99.55it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 188.24it/s]


[036/060] Train Acc: 0.74815 Loss: 0.76310 | Val Acc: 0.73827 loss: 0.83382


100%|██████████| 3087/3087 [00:30<00:00, 100.43it/s]
100%|██████████| 1048/1048 [00:05<00:00, 201.03it/s]


[037/060] Train Acc: 0.74943 Loss: 0.75986 | Val Acc: 0.73826 loss: 0.83149


100%|██████████| 3087/3087 [00:30<00:00, 101.77it/s]
100%|██████████| 1048/1048 [00:05<00:00, 202.83it/s]


[038/060] Train Acc: 0.75023 Loss: 0.75741 | Val Acc: 0.73975 loss: 0.83035
saving model with acc 0.73975


100%|██████████| 3087/3087 [00:30<00:00, 100.76it/s]
100%|██████████| 1048/1048 [00:05<00:00, 204.51it/s]


[039/060] Train Acc: 0.75043 Loss: 0.75601 | Val Acc: 0.74002 loss: 0.83040
saving model with acc 0.74002


100%|██████████| 3087/3087 [00:31<00:00, 98.87it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 191.52it/s]


[040/060] Train Acc: 0.75178 Loss: 0.75263 | Val Acc: 0.73939 loss: 0.83225


100%|██████████| 3087/3087 [00:30<00:00, 100.00it/s]
100%|██████████| 1048/1048 [00:04<00:00, 216.80it/s]


[041/060] Train Acc: 0.75216 Loss: 0.75001 | Val Acc: 0.73990 loss: 0.83162


100%|██████████| 3087/3087 [00:30<00:00, 99.72it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 198.57it/s]


[042/060] Train Acc: 0.75279 Loss: 0.74784 | Val Acc: 0.74030 loss: 0.82969
saving model with acc 0.74030


100%|██████████| 3087/3087 [00:31<00:00, 99.31it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 195.35it/s]


[043/060] Train Acc: 0.75346 Loss: 0.74585 | Val Acc: 0.74092 loss: 0.82969
saving model with acc 0.74092


100%|██████████| 3087/3087 [00:31<00:00, 98.06it/s] 
100%|██████████| 1048/1048 [00:04<00:00, 216.87it/s]


[044/060] Train Acc: 0.75416 Loss: 0.74315 | Val Acc: 0.73984 loss: 0.82932


100%|██████████| 3087/3087 [00:31<00:00, 98.90it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 190.22it/s]


[045/060] Train Acc: 0.75485 Loss: 0.74093 | Val Acc: 0.74032 loss: 0.83099


100%|██████████| 3087/3087 [00:30<00:00, 99.96it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 193.88it/s]


[046/060] Train Acc: 0.75529 Loss: 0.73952 | Val Acc: 0.74018 loss: 0.82741


100%|██████████| 3087/3087 [00:31<00:00, 99.51it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 197.57it/s]


[047/060] Train Acc: 0.75619 Loss: 0.73650 | Val Acc: 0.74072 loss: 0.82957


100%|██████████| 3087/3087 [00:31<00:00, 98.68it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 192.94it/s]


[048/060] Train Acc: 0.75703 Loss: 0.73440 | Val Acc: 0.74110 loss: 0.82690
saving model with acc 0.74110


100%|██████████| 3087/3087 [00:31<00:00, 99.10it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 197.21it/s]


[049/060] Train Acc: 0.75707 Loss: 0.73215 | Val Acc: 0.74137 loss: 0.82825
saving model with acc 0.74137


100%|██████████| 3087/3087 [00:31<00:00, 99.28it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 203.71it/s]


[050/060] Train Acc: 0.75730 Loss: 0.73088 | Val Acc: 0.74120 loss: 0.82691


100%|██████████| 3087/3087 [00:30<00:00, 101.31it/s]
100%|██████████| 1048/1048 [00:05<00:00, 200.99it/s]


[051/060] Train Acc: 0.75808 Loss: 0.72936 | Val Acc: 0.74162 loss: 0.82553
saving model with acc 0.74162


100%|██████████| 3087/3087 [00:30<00:00, 100.98it/s]
100%|██████████| 1048/1048 [00:05<00:00, 208.13it/s]


[052/060] Train Acc: 0.75870 Loss: 0.72713 | Val Acc: 0.74110 loss: 0.82875


100%|██████████| 3087/3087 [00:31<00:00, 99.20it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 196.74it/s]


[053/060] Train Acc: 0.75942 Loss: 0.72524 | Val Acc: 0.74119 loss: 0.83122


100%|██████████| 3087/3087 [00:30<00:00, 99.89it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 195.76it/s]


[054/060] Train Acc: 0.75975 Loss: 0.72330 | Val Acc: 0.74173 loss: 0.82867
saving model with acc 0.74173


100%|██████████| 3087/3087 [00:30<00:00, 100.59it/s]
100%|██████████| 1048/1048 [00:05<00:00, 183.42it/s]


[055/060] Train Acc: 0.76024 Loss: 0.72163 | Val Acc: 0.74152 loss: 0.82636


100%|██████████| 3087/3087 [00:30<00:00, 99.60it/s] 
100%|██████████| 1048/1048 [00:04<00:00, 216.72it/s]


[056/060] Train Acc: 0.76114 Loss: 0.71965 | Val Acc: 0.74229 loss: 0.82741
saving model with acc 0.74229


100%|██████████| 3087/3087 [00:30<00:00, 101.76it/s]
100%|██████████| 1048/1048 [00:04<00:00, 212.07it/s]


[057/060] Train Acc: 0.76165 Loss: 0.71790 | Val Acc: 0.74211 loss: 0.82349


100%|██████████| 3087/3087 [00:31<00:00, 98.47it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 202.12it/s]


[058/060] Train Acc: 0.76214 Loss: 0.71546 | Val Acc: 0.74256 loss: 0.82449
saving model with acc 0.74256


100%|██████████| 3087/3087 [00:31<00:00, 99.10it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 183.89it/s]


[059/060] Train Acc: 0.76216 Loss: 0.71556 | Val Acc: 0.74227 loss: 0.82523


100%|██████████| 3087/3087 [00:31<00:00, 99.35it/s] 
100%|██████████| 1048/1048 [00:05<00:00, 197.21it/s]

[060/060] Train Acc: 0.76236 Loss: 0.71452 | Val Acc: 0.74229 loss: 0.82427
total train start at: 2023-11-17 15:43:19.310420
concat_nframes: 21 ,num_epoch: 60 ,learning_rate: 0.001 ,Dropout:
total train cost:  37 minutes 21 seconds (total seconds: 2241 s).





In [265]:
del train_set, val_set
del train_loader, val_loader
gc.collect()

NameError: name 'train_set' is not defined

# Testing
Create a testing dataset, and load model from the saved checkpoint.

In [163]:
# load data
test_X = preprocess_data(split='test', feat_dir='./libriphone/feat',
                         phone_path='./libriphone', concat_nframes=concat_nframes)
test_set = LibriDataset(test_X, None)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

[Dataset] - # phone classes: 41, number of utterances for test: 857


857it [00:03, 239.62it/s]

[INFO] test set
torch.Size([527364, 819])





In [164]:
# load model
model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers,
                   hidden_dim=hidden_dim, dropout_p=dropout_p).to(device)
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

Make prediction.

In [165]:
pred = np.array([], dtype=np.int32)

model.eval()
with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        features = batch
        features = features.to(device)

        outputs = model(features)

        # get the index of the class with the highest probability
        _, test_pred = torch.max(outputs, 1)
        pred = np.concatenate((pred, test_pred.cpu().numpy()), axis=0)

100%|██████████| 1031/1031 [00:05<00:00, 193.31it/s]


Write prediction to a CSV file.

After finish running this block, download the file `prediction.csv` from the files section on the left-hand side and submit it to Kaggle.

In [None]:
with open('prediction.csv', 'w') as f:
    f.write('Id,Class\n')
    for i, y in enumerate(pred):
        f.write('{},{}\n'.format(i, y))