In [2]:
# preparing data
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import os
import random
import pandas as pd
import torch
from tqdm import tqdm
import gc
import numpy as np
import math
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"


def load_feat(path):
    feat = torch.load(path)
    return feat


def shift(x, n):
    if n < 0:
        left = x[0].repeat(-n, 1)
        right = x[:n]

    elif n > 0:
        right = x[-1].repeat(n, 1)
        left = x[n:]
    else:
        return x

    return torch.cat((left, right), dim=0)


def concat_feat(x, concat_n):
    assert concat_n % 2 == 1  # n must be odd
    if concat_n < 2:
        return x
    seq_len, feature_dim = x.size(0), x.size(1)
    x = x.repeat(1, concat_n)
    x = x.view(seq_len, concat_n, feature_dim).permute(
        1, 0, 2)  # concat_n, seq_len, feature_dim
    mid = (concat_n // 2)
    for r_idx in range(1, mid+1):
        x[mid + r_idx, :] = shift(x[mid + r_idx], r_idx)
        x[mid - r_idx, :] = shift(x[mid - r_idx], -r_idx)

    return x.permute(1, 0, 2).view(seq_len, concat_n * feature_dim)


def preprocess_data(split, feat_dir, phone_path, concat_nframes, train_ratio=0.8, train_val_seed=1337):
    class_num = 41  # NOTE: pre-computed, should not need change
    mode = 'train' if (split == 'train' or split == 'val') else 'test'

    label_dict = {}
    if mode != 'test':
        phone_file = open(os.path.join(
            phone_path, f'{mode}_labels.txt')).readlines()

        for line in phone_file:
            line = line.strip('\n').split(' ')
            label_dict[line[0]] = [int(p) for p in line[1:]]

    if split == 'train' or split == 'val':
        # split training and validation data
        usage_list = open(os.path.join(
            phone_path, 'train_split.txt')).readlines()
        random.seed(train_val_seed)
        random.shuffle(usage_list)
        percent = int(len(usage_list) * train_ratio)
        usage_list = usage_list[:percent] if split == 'train' else usage_list[percent:]
    elif split == 'test':
        usage_list = open(os.path.join(
            phone_path, 'test_split.txt')).readlines()
    else:
        raise ValueError(
            'Invalid \'split\' argument for dataset: PhoneDataset!')

    usage_list = [line.strip('\n') for line in usage_list]
    print('[Dataset] - # phone classes: ' + str(class_num) +
          ', number of utterances for ' + split + ': ' + str(len(usage_list)))

    max_len = 3000000
    X = torch.empty(max_len, 39 * concat_nframes)
    if mode != 'test':
        y = torch.empty(max_len, dtype=torch.long)

    idx = 0
    for i, fname in tqdm(enumerate(usage_list)):
        feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt'))
        cur_len = len(feat)
        feat = concat_feat(feat, concat_nframes)
        if mode != 'test':
            label = torch.LongTensor(label_dict[fname])

        X[idx: idx + cur_len, :] = feat
        if mode != 'test':
            y[idx: idx + cur_len] = label

        idx += cur_len

    X = X[:idx, :]
    if mode != 'test':
        y = y[:idx]

    print(f'[INFO] {split} set')
    print(X.shape)
    if mode != 'test':
        print(y.shape)
        return X, y
    else:
        return X


# define datasets


class LibriDataset(Dataset):
    def __init__(self, X, y=None):
        self.data = X
        if y is not None:
            self.label = torch.LongTensor(y)
        else:
            self.label = None

    def __getitem__(self,idx):
        if self.label is not None:
            return self.data[idx], self.label[idx]
        else:
            return self.data[idx]

    def __len__(self):
        return len(self.data)


# define model


class BasicBlock(nn.Module):
    def __init__(self, input_dim, output_dim,pDrop):
        super(BasicBlock, self).__init__()
        layers =[]
        layers.append(nn.Linear(input_dim,output_dim))
        layers.append(nn.BatchNorm1d(output_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(pDrop))
        self.block = nn.Sequential(*layers)
    def forward(self, x):
        x = self.block(x)
        return x


#class Classifier(nn.Module):
 #   def __init__(self, input_dim, output_dim=41, hidden_layers=1,hidden_dim=256):
  #      super(Classifier, self).__init__()

   #     self.fc = nn.Sequential(
    #        BasicBlock(input_dim, hidden_dim,0.2),
     #       BasicBlock()
      #      nn.Linear(hidden_dim, output_dim)
       # )

    #def forward(self, x):
    #    x = self.fc(x)
     #   return x
class Classifier(nn.Module):
    def __init__(self,input_dim,output_dim=41,hidden_layers=7,hidden_dim=1024,ep=1):
        super(Classifier,self).__init__()
        self.ep = ep
        self.hidden_dim = hidden_dim
        self.hidden_layers = hidden_layers
        
        layers = []
        
        for i in range(hidden_layers):
            #hidden_dim = int(hidden_dim/2)
            layers.append(nn.Linear(input_dim,hidden_dim))
            layers.append(nn.BatchNorm1d(hidden_dim))
            layers.append(nn.ReLU())
            p = 0.1*ep
            if p>0.5:
                p = 0.5
            layers.append(nn.Dropout(p))
            input_dim = hidden_dim
            
            
        
        self.fc = nn.Sequential(*layers,nn.Linear(hidden_dim,output_dim))
        
    def forward(self,x):
        x = self.fc(x)
        
        return x
# hyper parameters
# data prarameters
# the number of frames to concat with, n must be odd (total 2k+1 = n frames)
concat_nframes = 33   #可更改的超参数，必须为奇数
# the ratio of data used for training, the rest will be used for validation
train_ratio = 0.8

# training parameters
seed = 0                        # random seed
batch_size = 512                # batch size
num_epoch = 15                 # the number of training epoch
learning_rate = 0.0001          # learning rate
model_path = './model.ckpt'     # the path where the checkpoint will be saved

# model parameters
# the input dim of the model, you should not change the value
input_dim = 39 * concat_nframes
#hidden_layers = 5               # the number of hidden layers，可更改的超参数
hidden_dim = 1024                 # the hidden dim

# prepare dataset and model

# preprocess data
train_X, train_y = preprocess_data(split='train', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio)
val_X, val_y = preprocess_data(split='val', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio)

# get dataset
train_set = LibriDataset(train_X, train_y)
val_set = LibriDataset(val_X, val_y)

# remove raw feature to save memory
del train_X, train_y, val_X, val_y
gc.collect()

# get dataloader
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

device = 'cuda:2' if torch.cuda.is_available() else 'cpu'
print(f'DEVICE: {device}')

# fix seed


def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


# fix random seed
same_seeds(seed)

# create model, define a loss function, and optimizer
model = Classifier(input_dim=input_dim).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

best_acc = 0.0
for epoch in range(num_epoch):
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # training
    model.train()  # set the model to training mode
    model.ep += 1
    model.hidden_dim = int(model.hidden_dim/2)
    if model.hidden_layers <= 64:
        model.hidden_layers = 64
    for i, batch in enumerate(tqdm(train_loader)):
        features, labels = batch
        features = features.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(features)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # get the index of the class with the highest probability
        _, train_pred = torch.max(outputs, 1)
        train_acc += (train_pred.detach() == labels.detach()).sum().item()
        train_loss += loss.item()

    # validation
    if len(val_set) > 0:
        model.eval()  # set the model to evaluation mode
        with torch.no_grad():
            for i, batch in enumerate(tqdm(val_loader)):
                features, labels = batch
                features = features.to(device)
                labels = labels.to(device)
                outputs = model(features)

                loss = criterion(outputs, labels)

                _, val_pred = torch.max(outputs, 1)
                # get the index of the class with the highest probability
                val_acc += (val_pred.cpu() == labels.cpu()).sum().item()
                val_loss += loss.item()

            print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}'.format(
                epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(
                    train_loader), val_acc/len(val_set), val_loss/len(val_loader)
            ))

            # if the model improves, save a checkpoint at this epoch
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model.state_dict(), model_path)
                print('saving model with acc {:.3f}'.format(
                    best_acc/len(val_set)))
    else:
        print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}'.format(
            epoch + 1, num_epoch, train_acc /
            len(train_set), train_loss/len(train_loader)
        ))
if len(val_set) == 0:
    torch.save(model.state_dict(), model_path)
    print('saving model at last epoch')

del train_loader, val_loader
gc.collect()

# load data
test_X = preprocess_data(split='test', feat_dir='./libriphone/feat',phone_path='./libriphone', concat_nframes=concat_nframes)
test_set = LibriDataset(test_X, None)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

# load model
model = Classifier(input_dim=input_dim).to(device)
model.load_state_dict(torch.load(model_path))

# make prediction
test_acc = 0.0001
test_lengths = 0
pred = np.array([], dtype=np.int32)

model.eval()
with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        features = batch
        features = features.to(device)

        outputs = model(features)

        # get the index of the class with the highest probability
        _, test_pred = torch.max(outputs, 1)
        pred = np.concatenate((pred, test_pred.cpu().numpy()), axis=0)
        
#writing prediction csv
with open('prediction.csv', 'w') as f:
    f.write('Id,Class\n')
    for i, y in enumerate(pred):
        f.write('{},{}\n'.format(i, y))

[Dataset] - # phone classes: 41, number of utterances for train: 3428


3428it [00:11, 287.71it/s]


[INFO] train set
torch.Size([2116368, 1287])
torch.Size([2116368])
[Dataset] - # phone classes: 41, number of utterances for val: 858


858it [00:03, 246.04it/s]


[INFO] val set
torch.Size([527790, 1287])
torch.Size([527790])
DEVICE: cuda:2


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:17<00:00, 53.19it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:10<00:00, 102.66it/s]


[001/015] Train Acc: 0.618904 Loss: 1.243167 | Val Acc: 0.686127 loss: 0.996162
saving model with acc 0.686


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:14<00:00, 55.17it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 123.17it/s]


[002/015] Train Acc: 0.696164 Loss: 0.959141 | Val Acc: 0.712261 loss: 0.906985
saving model with acc 0.712


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:14<00:00, 55.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.47it/s]


[003/015] Train Acc: 0.725127 Loss: 0.859194 | Val Acc: 0.724767 loss: 0.868063
saving model with acc 0.725


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:14<00:00, 55.15it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.87it/s]


[004/015] Train Acc: 0.744932 Loss: 0.790077 | Val Acc: 0.731674 loss: 0.848292
saving model with acc 0.732


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:14<00:00, 55.54it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.91it/s]


[005/015] Train Acc: 0.760422 Loss: 0.736621 | Val Acc: 0.738561 loss: 0.834610
saving model with acc 0.739


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:13<00:00, 56.18it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 123.57it/s]


[006/015] Train Acc: 0.773619 Loss: 0.691774 | Val Acc: 0.742157 loss: 0.824633
saving model with acc 0.742


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:13<00:00, 55.96it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.85it/s]


[007/015] Train Acc: 0.784785 Loss: 0.653115 | Val Acc: 0.745069 loss: 0.821253
saving model with acc 0.745


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:14<00:00, 55.77it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.05it/s]


[008/015] Train Acc: 0.795072 Loss: 0.617695 | Val Acc: 0.746782 loss: 0.824979
saving model with acc 0.747


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:14<00:00, 55.67it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.75it/s]


[009/015] Train Acc: 0.803877 Loss: 0.586994 | Val Acc: 0.747257 loss: 0.834221
saving model with acc 0.747


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:13<00:00, 56.10it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 125.81it/s]


[010/015] Train Acc: 0.811756 Loss: 0.559728 | Val Acc: 0.747769 loss: 0.838709
saving model with acc 0.748


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:15<00:00, 55.09it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.87it/s]


[011/015] Train Acc: 0.819384 Loss: 0.534348 | Val Acc: 0.749306 loss: 0.847187
saving model with acc 0.749


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:13<00:00, 56.30it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 125.17it/s]


[012/015] Train Acc: 0.826327 Loss: 0.511784 | Val Acc: 0.750278 loss: 0.851366
saving model with acc 0.750


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:14<00:00, 55.70it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.46it/s]


[013/015] Train Acc: 0.832493 Loss: 0.490753 | Val Acc: 0.749575 loss: 0.861981


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:13<00:00, 56.07it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 124.53it/s]


[014/015] Train Acc: 0.838301 Loss: 0.471438 | Val Acc: 0.749639 loss: 0.875731


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 4134/4134 [01:14<00:00, 55.27it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1031/1031 [00:08<00:00, 126.24it/s]


[015/015] Train Acc: 0.843604 Loss: 0.454618 | Val Acc: 0.749592 loss: 0.883738
[Dataset] - # phone classes: 41, number of utterances for test: 1078


1078it [00:03, 282.70it/s]


[INFO] test set
torch.Size([646268, 1287])


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1263/1263 [00:07<00:00, 179.40it/s]


In [3]:
!nvidia-smi

Thu Mar 10 15:11:18 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 3080    Off  | 00000000:86:00.0 Off |                  N/A |
| 30%   27C    P8    22W / 320W |      0MiB / 10018MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 3080    Off  | 00000000:AF:00.0 Off |                  N/A |
| 30%   27C    P8    21W / 320W |      0MiB / 10018MiB |      0%      Default |
|       