# Import libraries

In [None]:
! pip install transformers
! pip install datasets
! pip install Sentencepiece
! pip install torchmetrics
! pip install evaluate
! pip install audiomentations
! pip install huggingface_hub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [None]:
!git version

git version 2.17.1


In [None]:
!git config --global user.email “pamelyking@gmail.com”
!git config --global user.name “pamely”

In [None]:
!git clone https://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@github.com/pamely/idl-project.git 

/bin/bash: /content/drive/MyDrive/F22/IDL-Project/idl-project/data/BabyChillantoDB: Is a directory


In [None]:
!ls

checkpoint  idl-project  Ubenwa  Ubenwa-Pam  Ubenwa-Pam-lstm


In [None]:
%cd idl-project/

In [None]:
import torch, numpy as np
import torch.nn as nn
from torch.nn import *
import torch.optim as optim
import torchaudio
import os, gc
# from torchmetrics.classification import BinaryHingeLoss
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau, MultiStepLR

# Transformer-Based Models:
import librosa
#Importing Pytorch
import torch
#Importing Wav2Vec

# quality-of-life packages
from tqdm import tqdm # so that data-loading process can be visualized
from torchsummary import summary # summary of the model

import random

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cuda


# Config:

In [None]:
config = {
    "epochs": 50,
    "batch-size": 50,
    "lr": 0.01,
    "momentum": 0.9,
    "weight-decay": 0,
    "scheduler-step-size": [15],
    "scheduler-gamma": 0.1,
    "seed": 11785,
    "label-smoothing": 0
}

# Data-Preprocessing

In [None]:
class MyPipeline(torch.nn.Module):
    def __init__(self, orig_freq, new_freq=8000):

        super().__init__()
        self.orig_freq = orig_freq
        self.new_freq = new_freq

        # win_length, hop_length based on paper and intuition from these posts:
        # https://groups.google.com/g/librosa/c/xeodGZVDE1s
        # melkwargs={"win_length": 240, "hop_length": 80, "f_min": 20, "f_max": 4000, "n_mels": 40}
        self.mfcc = torchaudio.transforms.MFCC(sample_rate=new_freq)
        self.resample = torchaudio.transforms.Resample(new_freq=new_freq, orig_freq=orig_freq)


    def forward(self, waveform : torch.Tensor) -> torch.Tensor:
        if (waveform.shape[1] != self.orig_freq):
          transform_fn_tmp = torchaudio.transforms.Resample(new_freq=self.new_freq, orig_freq=waveform.shape[1])
          resampled = transform_fn_tmp(waveform)
        else:
          resampled = self.resample(waveform)

        resampled_audio_list = np.array(resampled)[0]
        mfcc = self.mfcc(resampled)
        assert(len(np.array(resampled)[0]) != 1)
        return resampled_audio_list, mfcc


#TODO: This does not look like it is returning random data samples
class BabyChillanto(torch.utils.data.Dataset):
    def __init__(self, control_path, disease_path, partition="train", audio_input=False):
        self.mfcc, self.label = [], []
        self.audio_files = [] # for the transformer model
        control_names = os.listdir(control_path)
        disease_names = os.listdir(disease_path)

        train_split_control = int(len(control_names) * 0.6) # 60-20-20 split from paper
        train_split_disease = int(len(disease_names) * 0.6)

        val_split_control = int(len(control_names))
        val_split_disease = int(len(control_names))

        if partition == "train":
            control_names = control_names[:train_split_control]
            disease_names = disease_names[:train_split_disease]

        elif (partition == "val" or partition == "test"):
            control_names = control_names[train_split_control:val_split_control]
            disease_names = disease_names[train_split_disease:val_split_disease]

        # elif partition == "test":
        #     control_names = control_names[val_split_control:]
        #     disease_names = disease_names[val_split_disease:]

        else:
            raise NameError("Unknown partition")

        pipeline = MyPipeline(orig_freq=24000, new_freq=16000) # sampling for transformer defaults to 16000 sampling rate

        # Note: 1 = Control, 0 = Asphyxia
        print("Loading Control")
        for v in tqdm(control_names):
            data_path = os.path.join(control_path, v)
            waveform, sample_rate = torchaudio.load(data_path)

            # print(waveform.shape)
            if (np.all(np.array(waveform) - 0 <= 10e-6)): # remove waveforms of all zeros
              continue
            audio_file, mfcc = pipeline(waveform)
            # mfcc -= (np.mean(mfcc, axis=0) + 1e-8)

            # if (np.all(mfcc == 0)):
            #   continue # remove mfccs of all zeros
            self.mfcc.append(mfcc)
            self.audio_files.append(audio_file)
            # l = torch.zeros(2)
            # l[0] = 1
            self.label.append(1)

        print("Loading Asphyxia")
        for v in tqdm(disease_names):
            data_path = os.path.join(disease_path, v)
            waveform, sample_rate = torchaudio.load(data_path)

            if (np.all(np.array(waveform) - 0 <= 10e-6)): # remove waveforms of all zeros
              continue
            audio_file, mfcc = pipeline(waveform)
            # mfcc -= (np.mean(mfcc, axis=0) + 1e-8)

            # if (np.all(mfcc == 0)):
            #   continue # remove mfccs of all zeros
            self.mfcc.append(mfcc)
            self.audio_files.append(audio_file)
            # l = torch.zeros(2)
            # l[1] = 1
            self.label.append(0)
        assert len(self.mfcc) == len(self.label)


        # Suffle the data
        # both = list(zip(self.mfcc, self.label))
        # random.shuffle(both)
        # self.mfcc, self.label = zip(*both)

        # Padding the MFCCs to be the same size:
        # Find maximum width of mfcc:
        shapes_width = [mfcc.shape[2] for mfcc in self.mfcc]
        max_width = max(shapes_width)
        for i in range(len(self.mfcc)):
          if (max_width - self.mfcc[i].shape[2] == 0):
            continue
          padding_amount = nn.ConstantPad1d((0, max_width - self.mfcc[i].shape[2]), 0)
          self.mfcc[i] = padding_amount(self.mfcc[i])

        if (audio_input):
          self.length = len(self.audio_files)
          self.inputs = self.audio_files
        else:
          self.length = len(self.mfcc)
          self.inputs = self.mfcc



    def __len__(self):
        return self.length

    def __getitem__(self, ind):

        input = self.inputs[ind].squeeze(1)
        label = self.label[ind]


        return input, label

    # def collate_fn(self, batch):
    #   data = [item[0] for item in batch]
    #   target = [item[1] for item in batch]
    #   target = torch.LongTensor(target)
    #   return [data, target]

# Load the Datasets + Check for Transformer Parameters:

## Loading the Datasets

In [None]:
# (I changed the filepath s.t. it's easier to just use the dataset locally)
# asphyxia_filepath = "/content/drive/MyDrive/F22/IDL-Project/idl-project/Bootstrapped/tanh/1s_asphyxia"
# normal_filepath = "/content/drive/MyDrive/F22/IDL-Project/idl-project/Bootstrapped/tanh/1s_normal"

# torch.manual_seed(config["seed"])


# train_data = BabyChillanto(normal_filepath, asphyxia_filepath, partition="train")
# val_data = BabyChillanto(normal_filepath, asphyxia_filepath, partition="val")

# train_data_audio = BabyChillanto(normal_filepath, asphyxia_filepath, partition="train", audio_input = True)
# val_data_audio = BabyChillanto(normal_filepath, asphyxia_filepath, partition="val", audio_input = True)

# # potentially todo: increase the number of files processed
# train_loader = torch.utils.data.DataLoader(train_data, num_workers= 1,
#                                            batch_size= config["batch-size"], pin_memory= True,
#                                            shuffle= True)


# val_loader = torch.utils.data.DataLoader(val_data, num_workers= 1,
#                                          batch_size= config["batch-size"], pin_memory= True,
#                                          shuffle= False)

# train_loader_audio = torch.utils.data.DataLoader(train_data_audio, num_workers= 1,
#                                            batch_size= config["batch-size"], pin_memory= True,
#                                            shuffle= True)


# val_loader_audio = torch.utils.data.DataLoader(val_data_audio, num_workers= 1,
#                                          batch_size= config["batch-size"], pin_memory= True,
#                                          shuffle= False)


asphyxia_filepath = "/content/drive/MyDrive/F22/IDL-Project/idl-project/data/BabyChillantoDB/reverb/1s_asphyxia"
normal_filepath = "/content/drive/MyDrive/F22/IDL-Project/idl-project/data/BabyChillantoDB/reverb/1s_normal"

torch.manual_seed(config["seed"])

train_data = BabyChillanto(normal_filepath, asphyxia_filepath, partition="train")
val_data = BabyChillanto(normal_filepath, asphyxia_filepath, partition="val")

train_data_audio = BabyChillanto(normal_filepath, asphyxia_filepath, partition="train", audio_input = True)
val_data_audio = BabyChillanto(normal_filepath, asphyxia_filepath, partition="val", audio_input = True)

# potentially todo: increase the number of files processed
train_loader = torch.utils.data.DataLoader(train_data, num_workers= 1,
                                           batch_size= config["batch-size"], pin_memory= True,
                                           shuffle= True)


val_loader = torch.utils.data.DataLoader(val_data, num_workers= 1,
                                         batch_size= config["batch-size"], pin_memory= True,
                                         shuffle= False)

train_loader_audio = torch.utils.data.DataLoader(train_data_audio, num_workers= 1,
                                           batch_size= config["batch-size"], pin_memory= True,
                                           shuffle= True)


val_loader_audio = torch.utils.data.DataLoader(val_data_audio, num_workers= 1,
                                         batch_size= config["batch-size"], pin_memory= True,
                                         shuffle= False)




Loading Control


100%|██████████| 608/608 [00:09<00:00, 65.77it/s] 


Loading Asphyxia


100%|██████████| 408/408 [01:15<00:00,  5.41it/s]


Loading Control


100%|██████████| 406/406 [00:01<00:00, 272.90it/s]


Loading Asphyxia


100%|██████████| 272/272 [00:50<00:00,  5.43it/s]


Loading Control


100%|██████████| 608/608 [00:02<00:00, 245.81it/s]


Loading Asphyxia


100%|██████████| 408/408 [00:02<00:00, 176.22it/s]


Loading Control


100%|██████████| 406/406 [00:01<00:00, 270.44it/s]


Loading Asphyxia


100%|██████████| 272/272 [00:01<00:00, 170.05it/s]


#### LSTM

In [None]:
for data in train_loader:
    x, label = data
    print(x.shape, label.shape)
    break

RuntimeError: ignored

In [None]:
OUT_SIZE = 2 

class LSTM(nn.Module):

    def __init__(self, input_size = 81 , output_channel = 256, n_layers=3, hidden_size=256, num_classes = 2):

        super(LSTM, self).__init__()

        # Adding some sort of embedding layer or feature extractor might help performance.
        # self.embedding1 = nn.Conv1d(input_size, 128, 5, padding='same')
        # self.embedding2 = nn.Conv1d(128, output_channel, 5, padding='same')

        self.embedding = nn.Sequential(
           nn.Conv1d(input_size, 128, 5, padding='same'),
           nn.BatchNorm1d(128),
           nn.GELU(),
           nn.Dropout(0.0),

           nn.Conv1d(128, 256, 5, padding='same'),
           nn.BatchNorm1d(256),
           nn.GELU(),
           nn.Dropout(0.2),

           nn.Conv1d(256, 512, 5, padding='same'),
           nn.BatchNorm1d(512),
           nn.GELU(),
           nn.Dropout(0.5)
          )
        self.hidden_size = hidden_size

        # TODO : look up the documentation. You might need to pass some additional parameters.
        self.lstm = nn.LSTM(input_size = output_channel, 
                            hidden_size = hidden_size,
                            num_layers = n_layers, 
                            batch_first=True, 
                            bidirectional=True,
                            dropout=0.2
                            )
        self.classification = nn.Sequential( 
            nn.Linear(20480, 2048), 
            nn.GELU(),
            nn.Dropout(0.2),

            # adding this layer for the Baby 
            
            nn.Linear(2048, num_classes)
        )
        

    def forward(self, x):
      
        x = x.permute(0,2,1)

        x = self.embedding(x)

        x = x.permute(0,2,1)

        lstm_out, _ = self.lstm(x)
      
        out = self.classification(lstm_out.reshape((x.size(0), -1)))

        return  out


In [None]:
torch.cuda.empty_cache()

model = LSTM(output_channel = 512, n_layers = 3, hidden_size = 256).to(device) 

In [None]:
out = model(x.squeeze(1).to(device))

out.size()

NameError: ignored

In [None]:
train_config = {
    "beam_width" : 2,
    "lr" : 1e-3,
    "epochs" : 30
    }

In [None]:
criterion = torch.nn.CrossEntropyLoss() #Defining Loss function 
# criterion = torch.nn.BCELoss() #Defining Loss function 

optimizer = torch.optim.AdamW(model.parameters(), lr=train_config['lr'], weight_decay=1e-5) #Defining Optimizer

# Recommended : Define Scheduler for Learning Rate, including but not limited to StepLR, MultiStepLR, CosineAnnealingLR, ReduceLROnPlateau, etc. 
# You can refer to Pytorch documentation for more information on how to use them.

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=2)

scaler = torch.cuda.amp.GradScaler()

In [None]:
def train(train_loader, model, optimizer, criterion, scaler):
    

    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
    total_loss = 0
    num_correct = 0

    model.train()
    
    for i, (mfccs, labels) in enumerate(train_loader):
        
        optimizer.zero_grad() # Zero gradients
       
        mfccs, labels = mfccs.to(device), labels.to(device)
            
        outputs = model(mfccs.squeeze(1))
 
        loss = criterion(outputs, labels) # for HingeLoss, you use the predictions

        preds = torch.argmax(outputs, axis=1)

        num_correct += int((preds == labels).sum())

        total_loss += float(loss.item())

        batch_bar.set_postfix(
            
            acc="{:.04f}%".format(100 * num_correct / (config['batch-size']*(i + 1))),

            loss="{:.04f}".format(float(total_loss / (i + 1))),
           
            num_correct=num_correct,

            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        loss.backward(retain_graph = True)

        optimizer.step()

        # TODO? Depending on your choice of scheduler,
        # You may want to call some schdulers inside the train function. What are these?
      
        batch_bar.update() # Update tqdm bar

    batch_bar.close() # You need this to close the tqdm bar

    acc = 100 * num_correct / (config['batch-size'] * len(train_loader))
    total_loss = float(total_loss / len(train_loader))

    return acc, total_loss #, total_fine_tuning_loss

In [None]:
def eval(train_loader, model, optimizer, criterion, scaler):
    

    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
    total_loss = 0
    num_correct = 0

    model.train()
    for i, (mfccs, labels) in enumerate(train_loader):
        
        optimizer.zero_grad() # Zero gradients
        # optimizer_center_loss.zero_grad()

        mfccs, labels = mfccs.to(device), labels.to(device)
        # print(labels.shape)
        
        # with torch.cuda.amp.autocast(): # This implements mixed precision. Thats it! 
        # no need for mixed precision
            # mfccs = torch.Tensor.cpu(mfccs)
        print(mfccs.shape)
            
        outputs = model(mfccs.squeeze(1))
        # print(type(outputs))
        # print(outputs)

        # softmax_fn = torch.nn.Softmax(dim=1)
        # probs_outputs = softmax_fn(outputs)

        # max_outputs = np.zeros(len(labels))
        # print(preds)
        # print(labels)
        # assert(False)

        # for i in range(len(max_outputs)):
        #   if (preds[i] == 0):
        #     max_outputs[i] = 1 - probs_outputs[i][0]
        #     assert(max_outputs[i] <= 0.5)
        #   else: # preds[i] == 1
        #     max_outputs[i] = probs_outputs[i][1]


        loss = criterion(outputs, labels) # for HingeLoss, you use the predictions

        # Update no. of correct predictions & loss as we iterate
        # print(outputs)
        # print(torch.argmax(outputs, axis=1))
        # print(labels)
        preds = torch.argmax(outputs, axis=1)
        num_correct += int((preds == labels).sum())
        total_loss += float(loss.item())
        # total_fine_tuning_loss += float(loss1.item())

        # tqdm lets you add some details so you can monitor training as you train.
        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * num_correct / (config['batch-size']*(i + 1))),
            loss="{:.04f}".format(float(total_loss / (i + 1))),
            #fine_tuning_loss = "{:.04f}".format(float(total_fine_tuning_loss / (i + 1))),
            num_correct=num_correct,
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        # loss.backward(retain_graph = True)
        # scaler.scale(loss).backward(retain_graph=True) # You have to pass retain_graph=True here, so that the scaler will remember this backward call
        # scaler.scale(loss1).backward()

        # for parameter in fine_tuning_criterion.parameters():
        #     parameter.grad.data *= (1.0 / loss_weight)
        optimizer.step()
        # scaler.step(optimizer)  
        # scaler.step(optimizer_center_loss)
        # scaler.update() 

        # TODO? Depending on your choice of scheduler,
        # You may want to call some schdulers inside the train function. What are these?
      
        batch_bar.update() # Update tqdm bar

    batch_bar.close() # You need this to close the tqdm bar

    acc = 100 * num_correct / (config['batch-size'] * len(train_loader))
    total_loss = float(total_loss / len(train_loader))

    return acc, total_loss #, total_fine_tuning_loss

In [None]:
torch.cuda.empty_cache()
gc.collect()

ref_acc = 0

#TODO: Please complete the training loop


for epoch in range(train_config["epochs"]):

  model.train()

  print("\nEpoch {}/{}".format(epoch+1, train_config["epochs"]))

  train_loss = train(train_loader, model, optimizer, criterion, scaler)

  accuracy, val_loss = eval(train_loader, model, optimizer, criterion, scaler)

  scheduler.step(val_loss)


  lr = optimizer.state_dict()['param_groups'][0]['lr']

  print("\tTrain Loss: {:.4f}".format(train_loss[1]))

  print("\tValidation Loss: {:.4f}".format(val_loss))

  print("\tValidation accuracy: {:.4f}".format(accuracy))

  if accuracy >= ref_acc:
     torch.save({'model_state_dict':model.state_dict(),
                  'optimizer_state_dict':optimizer.state_dict(),
                  'epoch': epoch},'./checkpoint')        



## Evaluation Metric (UAR)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_auc_score

def test(test_loader, model, epoch):
    model.eval()
    num_correct = 0


    tp = 0
    tn = 0
    fp = 0
    fn = 0


    batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, position=0, leave=False, desc='Val', ncols=5)

    y = []
    y_hat = []

    for i, (data, labels) in enumerate(test_loader):

        data = data.to(device)
        labels = labels.to(device)

        # apply transform and model on whole batch directly on device
        # data = transform(data) # I don't think this step is necessary because the transformation already occurs in when pre-processing
        outputs = model(data.squeeze(1))

        predictions = torch.argmax(outputs, axis=1)

        for label_i in range(len(labels)):
            if (labels[label_i] == 1 and predictions[label_i] == 1):
                tp += 1
            elif (labels[label_i] == 1 and predictions[label_i] == 0):
                fn += 1
            elif (labels[label_i] == 0 and predictions[label_i] == 1):
                fp += 1
            elif (labels[label_i] == 0 and predictions[label_i] == 0):
                tn += 1

        num_correct += int((torch.argmax(outputs, axis=1) == labels).sum())

        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * num_correct / (config['batch-size']*(i + 1))),
            # fine_tuning_loss="{:.04f}".format(float(total_fine_tuning_loss / (i + 1))),
            num_correct=num_correct)

        batch_bar.update()
        
        #wandb.log({"train accuracy": accuracy})

        # # update progress bar
        # pbar.update(pbar_update)

        y.append(labels.cpu().numpy())
        y_hat.append(predictions.cpu().numpy())
    batch_bar.close()

    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + tp)
    uar = 0.5 * sensitivity + 0.5 * specificity

    auroc = roc_auc_score(np.concatenate(y), np.concatenate(y_hat))
    acc = 100 * num_correct / (config["batch-size"] * (len(test_loader)))


    # wandb.log({"train accuracy": accuracy})
    #print(f"Test Epoch: {epoch}\tAccuracy: {acc}% UAR: {uar}")
    print(f"Test Epoch: {epoch}\tAccuracy: {acc}% UAR: {auroc}")
    print("Specificity: ", specificity)
    print("Sensitivity: ", sensitivity)

    # return uar, accuracy

In [None]:
test(val_loader, model, 30)

## Evaluation Function

## Run Here For Evaluation

In [None]:
# evaluate(model_res18, train_loader, val_loader)
# evaluate(model_transformer, train_loader_audio, val_loader_audio)