In [17]:
import numpy as np
import torch
import random
import os
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pack_sequence, pad_packed_sequence
import gc

In [2]:
from torch.utils.data import Dataset
import torch.nn as nn
#from torch.optim.lr_scheduler import LambdaLR
#import random
from torch.utils.data import DataLoader

import numpy as np
#import math
import os
from tqdm import tqdm
import datetime
import gc

# # Accelerate parts
# from accelerate import Accelerator, notebook_launcher # main interface, distributed launcher
# from accelerate.utils import set_seed # reproducability across devices

In [20]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

## Process Data

In [3]:
class LibriDataset(Dataset):
    def __init__(self, X, y=None):
        self.data = X
        if y is not None:
            # self.label = torch.LongTensor(y)
            self.label = y
        else:
            self.label = None

    def __getitem__(self, idx):
        if self.label is not None:
            return self.data[idx], self.label[idx]
        else:
            return self.data[idx]

    def __len__(self):
        return len(self.data)

    def totalSeqLen(self):
        x_seq_len_list = [s.shape[0] for s in self.data]
        return sum(x_seq_len_list)

In [4]:
def load_feat(path):
    import torch
    feat = torch.load(path)
    return feat

def preprocess_data(split, feat_dir, phone_path, train_ratio=0.8, random_seed=1213):

    if split == 'train' or split == 'val':
        mode = 'train'
    elif split == 'test':
        mode = 'test'
    else:
        raise ValueError('Invalid \'split\' argument for dataset: PhoneDataset!')

    label_dict = {}
    if mode == 'train':
        for line in open(os.path.join(phone_path, f'{mode}_labels.txt')).readlines():
            line = line.strip('\n').split(' ')
            label_dict[line[0]] = [int(p) for p in line[1:]]
        
        # split training and validation data
        usage_list = open(os.path.join(phone_path, 'train_split.txt')).readlines()
        train_len = int(len(usage_list) * train_ratio)
        usage_list = usage_list[:train_len] if split == 'train' else usage_list[train_len:]
    elif mode == 'test':
        usage_list = open(os.path.join(phone_path, 'test_split.txt')).readlines()

    usage_list = [line.strip('\n') for line in usage_list]

    x_tensor_list = []
    if mode == 'train':
        y_tensor_list = []

    idx = 0
    for i, fname in enumerate(usage_list):
        feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt'))
        cur_len = len(feat)
        if mode == 'train':
            label = label_dict[fname]

        x_tensor_list.append(feat)
        if mode == 'train':
            y_tensor_list.append(label)
          

    X = x_tensor_list
    if mode == 'train':
        y = y_tensor_list

    if mode == 'train':
        return X, y
    else:
        return X

def collate_fn(data):
    return data

## Model

In [5]:
class BasicBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(BasicBlock, self).__init__()

        self.block = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.ReLU(),
        )

    def forward(self, x):
        x = self.block(x)
        return x


In [26]:
class LstmClassifier(nn.Module):
    def __init__(self, input_dim, output_dim=41, hidden_layers=4, hidden_dim=256, batch_size = 8):
        super(LstmClassifier, self).__init__()
        self.input_dim = input_dim
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        self.hidden_layers = hidden_layers
        self.fc =  nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout (dropout)
        )
        self.layer_norm = nn.LayerNorm(hidden_dim * 2 )
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, hidden_layers, dropout=dropout, bidirectional = True, batch_first = True)
        self.bc =  nn.Sequential(
            nn.Dropout (dropout),
            nn.Linear(hidden_dim *2, output_dim)
        )


    def forward(self, x, seq_lenght_list):
        h0 = torch.randn(self.hidden_layers*2, x.shape[0], self.hidden_dim).to('cuda')
        c0 = torch.randn(self.hidden_layers*2, x.shape[0], self.hidden_dim).to('cuda')
        x = self.fc(x)
        x_pad =pack_padded_sequence(x, batch_first=True, lengths=seq_lenght_list).to('cuda')
        out, (hn, cn) = self.lstm(x_pad,(h0,c0))
        out = pad_packed_sequence(out,batch_first=True)
        out = self.layer_norm(out[0])
        out = self.bc(out)
        return out


## Training

In [29]:
# BiLstm + wandb log
concat_nframes = 1              # the number of frames to concat with, n must be odd (total 2k+1 = n frames)
train_ratio = 0.75               # the ratio of data used for training, the rest will be used for validation

# training parameters
seed = 1213                        # random seed
batch_size = 8# batch size
num_epoch = 30                   # the number of training epoch
learning_rate = 2e-3         # learning rate
model_path = './model.ckpt'     # the path where the checkpoint will be saved

# model parameters
input_dim = 39 * concat_nframes # the input dim of the model, you should not change the value
hidden_layers = 7               # the number of hidden layers
hidden_dim = 256              # the hidden dim
dropout = 0.35
weight_decay = 0.05


feat_dir = './data/libriphone/feat/'
phone_path = './data/libriphone/'

In [30]:
# preprocess data
train_X, train_y = preprocess_data(split='train', feat_dir=feat_dir, phone_path=phone_path, train_ratio=train_ratio, random_seed=seed)
val_X, val_y = preprocess_data(split='val', feat_dir=feat_dir, phone_path=phone_path,  train_ratio=train_ratio, random_seed=seed)

# get dataset
train_set = LibriDataset(train_X, train_y)
val_set = LibriDataset(val_X, val_y)
# remove raw feature to save memory
del train_X, train_y, val_X, val_y
gc.collect()

295

In [31]:
# get dataloader
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
model = LstmClassifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim)

#if(os.path.exists(model_path)):       
#    model.load_state_dict(torch.load(model_path))
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=2e-3)
# 定义一个自定义函数来计算学习率
#def lr_lambda(epoch):
#    if epoch < 10:
#        return epoch / 10
#    else:
#        return 0.5 * (1 + math.cos(math.pi * (epoch - 10) / 20))
#scheduler = LambdaLR(optimizer, lr_lambda)

In [32]:
best_acc = 0
for epoch in range(40):
    print(optimizer.state_dict()['param_groups'][0]['lr'])
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # training
    model.train() # set the model to training mode
    for i, batch in enumerate(tqdm(train_loader)):
        batch.sort(key= lambda batch: len(batch[0]), reverse=True) 
        x_seq_list = [dataItem[0] for dataItem in batch] 
        y_seq_list = [dataItem[1] for dataItem in batch]

        x_seq_len_list = [s.shape[0] for s in x_seq_list]
        x_pad_seq = pad_sequence(x_seq_list, batch_first=True) 

        optimizer.zero_grad() 
        outputs = model(x_pad_seq, x_seq_len_list, device = device) 
        result = torch.cat([outputs[i][:x_seq_len_list[i]] for i in range(outputs.size(0))], dim=0)
        y_seq_tensor = torch.cat(y_seq_list, dim=0)
        # print(f"reslut shape {result.shape} , y_seq_tensor shape {y_seq_tensor.shape}")
        loss = criterion(result, y_seq_tensor)
            
        #======================================================================
        #attention here! 
        loss.backward() 
        #======================================================================


        _, train_pred = torch.max(result, 1) # get the index of the class with the highest probability
        train_acc_batch = (train_pred.detach() == y_seq_tensor.detach()).sum().item()
        train_acc += train_acc_batch
        train_loss += loss.item()
        if i % 100 == 0:
            print(f'Train Acc: {train_acc_batch/y_seq_tensor.shape[0]} Loss: {loss.item()}')
    # validation
    model.eval() # set the model to evaluation mode
    with torch.no_grad():
        for i, batch in enumerate(tqdm(val_loader)):
            
            batch.sort(key= lambda batch: len(batch[0]), reverse=True) 
            x_seq_list = [dataItem[0] for dataItem in batch] 
            y_seq_list = [dataItem[1] for dataItem in batch]

            x_seq_len_list = [s.shape[0] for s in x_seq_list]
            x_pad_seq = pad_sequence(x_seq_list, batch_first=True)

            outputs = model(x_pad_seq, x_seq_len_list, device = accelerator.device)

            result = torch.cat([outputs[i][:x_seq_len_list[i]] for i in range(outputs.size(0))], dim=0)
            y_seq_tensor = torch.cat(y_seq_list, dim=0)

            loss = criterion(result, y_seq_tensor) 

            _, val_pred = torch.max(result, 1) 

            val_acc += (val_pred.cpu() == y_seq_tensor.cpu()).sum().item() # get the index of the class with the highest probability
            val_loss += loss.item()
        print(f'[{epoch+1:03d}/{num_epoch:03d}] Train Acc: {train_acc/train_set.totalSeqLen():3.5f} Loss: {train_loss/len(train_loader):3.5f} | Val Acc: {val_acc/val_set.totalSeqLen():3.5f} loss: {val_loss/len(val_loader):3.5f}')
    
    #======================================================================
    #print logs and save ckpt  
    model_path="model.ckpt"
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print(f"epoch【{epoch}】@{nowtime} --> val_acc= {100 * val_acc:.2f}%")
    #======================================================================

0.002


  0%|          | 0/322 [00:00<?, ?it/s]


TypeError: forward() got an unexpected keyword argument 'device'