In [1]:
import numpy as np
import os
from torch.utils.data import Dataset
import librosa
import torch
import matplotlib.pyplot as plt
from torch import nn
import tqdm

device =  'cuda' if torch.cuda.is_available() else 'cpu'

print(f"Using {device}")

Using cuda


In [2]:
def pad_random(x: np.ndarray, max_len: int = 64000):
    x_len = x.shape[0]
    if x_len > max_len:
        stt = np.random.randint(x_len - max_len)
        return x[stt:stt + max_len]

    num_repeats = int(max_len / x_len) + 1
    padded_x = np.tile(x, (num_repeats))
    return pad_random(padded_x, max_len)

class SVDD2024(Dataset):
    """
    Dataset class for the SVDD 2024 dataset.
    """
    def __init__(self, base_dir, partition="train", max_len=64000):
        assert partition in ["train", "dev", "test"], "Invalid partition. Must be one of ['train', 'dev', 'test']"
        self.base_dir = base_dir
        self.partition = partition
        self.base_dir = os.path.join(base_dir, partition + "_set")
        self.max_len = max_len
        try:
            with open(os.path.join(base_dir, f"{partition}.txt"), "r") as f:
                self.file_list = f.readlines()
        except FileNotFoundError:
            if partition == "test":
                self.file_list = []
                # get all *.flac files in the test_set directory
                for root, _, files in os.walk(self.base_dir):
                    for file in files:
                        if file.endswith(".flac"):
                            self.file_list.append(file)
            else:
                raise FileNotFoundError(f"File {partition}.txt not found in {base_dir}")
    
    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):            
        if self.partition == "test":
            file_name = self.file_list[index].strip()
            label = 0 # dummy label. Not used for test set.
        else:
            file = self.file_list[index]
            file_name = file.split(" ")[2].strip()
            bonafide_or_spoof = file.split(" ")[-1].strip()
            label = 1 if bonafide_or_spoof == "bonafide" else 0
        try:
            x, _ = librosa.load(os.path.join(self.base_dir, file_name + ".flac"), sr=16000, mono=True)
            x = pad_random(x, self.max_len)
            x = librosa.util.normalize(x)
            x = librosa.feature.chroma_cqt(y=x,sr=16000)
            # file_name is used for generating the score file for submission
            if(label==0):
                return torch.unsqueeze(torch.from_numpy(x),dim=0), torch.tensor([1,0]).type(torch.float32), file_name
            else:
                return torch.unsqueeze(torch.from_numpy(x),dim=0), torch.tensor([0,1]).type(torch.float32), file_name
        except Exception as e:
            print(f"Error loading {file_name}: {e}")
            return None

In [3]:
train_ds=SVDD2024('./temp/ds/',partition='train')
test_ds=SVDD2024('./temp/ds/',partition='dev')

In [4]:
print(len(train_ds))
print(len(test_ds))

84404
43625


In [5]:
train_loader=torch.utils.data.DataLoader(train_ds,batch_size=16)
test_loader=torch.utils.data.DataLoader(test_ds,batch_size=16)

In [7]:
train_ds[0][0].shape

torch.Size([1, 12, 126])

In [8]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten=nn.Flatten()
        self.relu=nn.ReLU()
        self.l1=nn.Linear(in_features=12*126,out_features=1024,bias=True)
        self.l2=nn.Linear(in_features=1024,out_features=256,bias=True)
        self.l3=nn.Linear(in_features=256,out_features=128,bias=True)
        self.l4=nn.Linear(in_features=128,out_features=64,bias=True)
        self.l5=nn.Linear(in_features=64,out_features=2)

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.l1(x))
        x = self.relu(self.l2(x))
        x = self.relu(self.l3(x))
        x = self.relu(self.l4(x))
        x = self.relu(self.l5(x))
        return x

model=MLP()

In [9]:
model(torch.rand((1,12,126)))
model=model.to(device)

In [10]:
def accuracy_fn(logits,true):
    return torch.eq(torch.argmax(torch.softmax(logits,dim=1),dim=1).squeeze(),true).sum().item()/len(logits)

def train_model(model,epochs):
    optimizer=torch.optim.Adam(params=model.parameters())
    loss_fn=nn.CrossEntropyLoss()
    
    for i in tqdm.notebook.tqdm(range(epochs)):
        train_preds=torch.tensor([])
        val_preds=torch.tensor([])
        train_actual=torch.tensor([])
        val_actual=torch.tensor([])
        
        print("\nTraining:")
        model.train()
        
        #Defining accuracy and loss for train and val data
        temp_train_accuracy=[]
        temp_val_accuracy=[]
        temp_train_loss=[]
        temp_val_loss=[]
        #temp_train_f1=[]
        #temp_val_f1=[]
    
        net_train_accuracy=0
        net_val_accuracy=0
        net_train_loss=0
        net_val_loss=0
        #net_train_f1=0
        #net_val_f1=0
        
        with tqdm.notebook.tqdm(total=len(train_loader)) as pbar:
            for x,y,_ in train_loader:
                    x=x.to(device)
                    y=y.to(device)
        
                    #Calculating model output
                    logits=model(x)
        
                    #Reseting any old gradient values
                    optimizer.zero_grad()
                    loss=loss_fn(torch.squeeze(logits,dim=1),y)
                
                    #Track of metrics
                    preds=torch.argmax(torch.softmax(logits,dim=1),dim=1).squeeze()
                    #train_preds=torch.hstack((train_preds,preds.to("cpu")))
                    #train_actual=torch.hstack((train_actual,y.to("cpu")))
                    accuracy_train=accuracy_fn(logits.type(torch.float32),torch.argmax(y,dim=1))
                    #f1_train=f1_fn(preds,y)
                    temp_train_accuracy.append(accuracy_train)
                    temp_train_loss.append(loss.item())
                    #temp_train_f1.append(f1_train.item())
                
                    #Back Propogation
                    loss.backward()
                
                    #Update Parameters
                    optimizer.step()
                
                    #Progress Bar Update
                    pbar.update(1)
            pbar.close()
        #Tensorboard & Metrics for the dataset
        net_train_accuracy=sum(temp_train_accuracy)/len(temp_train_accuracy)
        net_train_loss=sum(temp_train_loss)/len(temp_train_loss)
        #net_train_f1=sum(temp_train_f1)/len(temp_train_f1)
        #writer.add_scalar("Train Accuracy",net_train_accuracy,i)
        #writer.add_scalar("Train Loss",net_train_loss,i)
        #writer.add_scalar("Train F1 Score",net_train_f1,i)
    
        #Evaluation
        print("Testing:")
        model.eval()
    
        with tqdm.notebook.tqdm(total=len(val_loader)) as pbar2:
            for x,y,_ in val_loader:
                x=x.to(device)
                y=y.to(device)
                
                #Setting inference mode
                with torch.inference_mode():
                    logits=model(x)
                    loss=loss_fn(torch.squeeze(logits,dim=1),y.type(torch.float32))
                    
                    #Track of metrics
                    preds=torch.argmax(torch.softmax(logits,dim=1),dim=1).squeeze()
                    #val_preds=torch.hstack((val_preds,preds.to("cpu")))
                    #val_actual=torch.hstack((val_actual,y.to("cpu")))
                    accuracy_val=accuracy_fn(logits,torch.argmax(y,dim=1))
                    #f1_val=f1_fn(preds,y)
                    temp_val_accuracy.append(accuracy_val)
                    temp_val_loss.append(loss.item())
                    #temp_val_f1.append(f1_val.item())
    
                    #Progress Bar Update
                    pbar2.update(1)
            pbar2.close()
    
        #Tensorboard & Metrics for the dataset
        net_val_accuracy=sum(temp_val_accuracy)/len(temp_val_accuracy)
        net_val_loss=sum(temp_val_loss)/len(temp_val_loss)
        #net_val_f1=sum(temp_val_f1)/len(temp_val_f1)
        #writer.add_scalar("Val Accuracy",net_val_accuracy,i)
        #writer.add_scalar("Val Loss",net_val_loss,i)
        #writer.add_scalar("Val F1 Score",net_val_f1,i)

        
        print(f"\nEpoch {i+1}:\n\nTrain Accuracy: {net_train_accuracy}\n\nTrain Loss: {net_train_loss}\n\nVal Accuracy: {net_val_accuracy}\n\nVal Loss: {net_val_loss}")



In [11]:
train_model(model,10)

  0%|          | 0/10 [00:00<?, ?it/s]


Training:


  0%|          | 0/5276 [00:00<?, ?it/s]

KeyboardInterrupt: 

# 