# Convolutional Neural Net for Binary Classification using Spectrograms

### Some Setup

We want to be able to run the notebook both locally and in Google Colab

In [1]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

print("In Colab: {}".format(IN_COLAB))

if IN_COLAB:
  # !pip install torchmetrics
  # !pip install kornia
  # !pip install torchvision
  google.colab.drive.mount('/content/drive')
  %cd /content/drive/My Drive/Go-Viral-Project/notebooks



In Colab: True
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/.shortcut-targets-by-id/10WA3loQgJ-TGlCX6cLycEGelfZJH9i3G/Go-Viral-Project/notebooks


Imports

In [2]:
import sys; sys.path.append('..')
import os
import torch
from torch.utils.data import random_split
import pandas as pd
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch.nn as nn
from torchvision import transforms
from src.RNN_utils.dataset import SoundDS
from torch.utils.data import default_collate
from src.RNN_utils.trainer import trainer
from src.RNN_utils.cross_val import crossValidate
from torchmetrics.classification import ConfusionMatrix
from kornia.losses.focal import BinaryFocalLossWithLogits

Setting paths

In [3]:
AUDIO_PATH = '../data/audio'
TENSOR_PATH = '../data/specs'
METADATA_PATH = '../data/metadata.csv'

 Setting seed for reproducability

In [4]:
SEED = 42
torch.manual_seed(SEED)

<torch._C.Generator at 0x7aa43415cd30>

Setting device

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


### Cleaning up Dataset

Creating binary target using the threshold of $5e5$.

In [15]:
df = pd.read_csv('../data/audio_features.csv')
df["viral"] = (df["number_of_videos"] > 5e5).astype("int32")

Removing songs that dont have spectrograms (just in case)

In [18]:
data_path = '../data/specs/'
files_not_found = 0

for idx, row in df.iterrows():
    song_path = os.path.join(data_path, row['id'] + '.pt')

    if not os.path.exists(song_path):
        df = df.drop(idx)
        files_not_found += 1

print(f"Number of files not found: {files_not_found}")

Number of files not found: 0


### Preprocessing

In [19]:

def preprocess_mbnet(X):
  prep = transforms.Compose([
      transforms.Resize((224,2206)),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

  return prep(torch.cat((X,X,X),dim=0))

In [20]:

myds = SoundDS(df, '../data/specs/', preprocess_mbnet)

# Random split of 80:20 between training and validation
num_items = len(myds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(myds, [num_train, num_val])

to_gpu = lambda x: list(map(lambda t: t.to(device), default_collate(x)))

# Create training and validation data loaders
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=16, collate_fn = to_gpu, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=16, collate_fn = to_gpu, shuffle=False)

In [21]:
b_size, channels, hight, width = next(iter(train_dl))[0].shape
num_batches = len(train_dl)
print(f'num batches: {num_batches}\nbatch size: {b_size}\nchannels: {channels}\nhight: {hight} \nwidth: {width}')



KeyError: ignored

## MobileNet V2:

### Overfitting the model:

Loading the model

In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model.classifier = nn.Sequential(
    nn.Linear(1280,2),
    nn.Softmax(dim=1)
)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=5e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1)

EPOCHS = 20
print(model)

In [None]:
# sum(param.numel() for param in model.parameters())
print(f'Number of trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')

In [None]:
(X,y) = next(iter(train_dl))

In [None]:
for epoch in range(EPOCHS):
    optimizer.zero_grad()
    y_prob = model(X)
    loss = criterion(y_prob,y)
    loss.backward()
    optimizer.step()
    loss = loss.item()
    scheduler.step()
    acc = torch.sum(torch.argmax(y_prob,dim=1)==y).item()/b_size
    print(f'Epoch #{epoch}: Loss - {loss}, Accuracy - {acc}')

### Training the model:

In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model.classifier = nn.Sequential(
    nn.Linear(1280,2),
    nn.Softmax(dim=1)
)

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.2)

In [None]:
from src.RNN_utils.trainer import trainer
from src.RNN_utils.cross_val import crossValidate

train_model = trainer(model,criterion,optimizer,scheduler,device)

In [None]:
results = train_model.train(train_dl,20,True)

### Cross Validation with Cross-Entropy Loss

Initializing lists to hold results and configs

In [None]:
configs = []
results = []

Loading model

In [None]:
config = {'lr':1e-3, 'weight_decay':1e-4, 'step_size': 10, 'gamma': 0.1}

model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model.classifier = nn.Sequential(
    nn.Linear(1280,2),
    nn.Softmax(dim=1)
)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=config['lr'], weight_decay=config['weight_decay'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=config['step_size'],gamma=config['gamma'])

train_model = trainer(model,criterion,optimizer,scheduler,device)

Initializing CV object

In [None]:
cv_obj = crossValidate(train_ds=train_ds, device=device, folds=3, batch_size=16)

Running CV

In [None]:
results.append(cv_obj.runCV(train_model, epochs=20))
configs.append(config)

In [None]:
from src.RNN_utils.cross_val import plotCV

plotCV(results, configs,title='Cross Validation for CNN MobileNetV2 with Cross-Entropy Loss')

### Cross Validation with Focal Loss

Very similar to previous cells

In [None]:
configs = []
results = []

In [None]:

config = {'lr':5e-5, 'weight_decay':2e-5, 'step_size': 3, 'gamma': 0.2, 'dropout': 0.5, 'gamma_fl': 2, 'weight': torch.Tensor([1.0,1.0]).to(device)}
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model.classifier = nn.Sequential(
    nn.Dropout1d(p=config['dropout']),
    nn.Linear(1280,2),
    nn.Softmax(dim=1)
)

model.to(device)
criterion = BinaryFocalLossWithLogits(alpha=1.0, gamma=config['gamma_fl'], reduction='mean', weight=config['weight'])
optimizer = torch.optim.Adam(model.parameters(),lr=config['lr'], weight_decay=config['weight_decay'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=config['step_size'],gamma=config['gamma'])
train_model = trainer(model,criterion,optimizer,scheduler,device)

In [None]:
cv_obj = crossValidate(train_ds=train_ds, device=device, folds=3, batch_size=16)

In [None]:
results.append(cv_obj.runCV(train_model, epochs=10))
configs.append(config)

In [None]:
from src.RNN_utils.cross_val import plotCV

plotCV(results, configs,title='CV for MobileNet V2 with Focal Loss')

### Full Training with Focal Loss

In [None]:
config = {'lr':5e-5, 'weight_decay':2e-5, 'step_size': 5, 'gamma': 0.2, 'dropout': 0.5, 'gamma_fl': 2, 'weight': torch.Tensor([1.0,1.0]).to(device)}
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
model.classifier = nn.Sequential(
    nn.Dropout1d(p=config['dropout']),
    nn.Linear(1280,2),
    nn.Softmax(dim=1)
)

model.to(device)
criterion = BinaryFocalLossWithLogits(alpha=1.0, gamma=config['gamma_fl'], reduction='mean', weight=config['weight'])
optimizer = torch.optim.Adam(model.parameters(),lr=config['lr'], weight_decay=config['weight_decay'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=config['step_size'],gamma=config['gamma'])
train_model = trainer(model,criterion,optimizer,scheduler,device)

In [None]:
train_model = trainer(model,criterion,optimizer,scheduler,device)
results = train_model.train(train_dl,20,True)

In [None]:
plotTrainRes(results,20)

In [None]:
metric = ConfusionMatrix(task='binary', num_classes=2)
metric.update(pred[:,1], y)
fig_, ax_ = metric.plot()

In [None]:
metric = ConfusionMatrix(task='binary', num_classes=2)
metric.update(pred[:,1], y)
fig_, ax_ = metric.plot()

In [None]:
prob_list = []
target_list = []

train_model.model.eval()
for (X,y) in iter(val_dl):
  with torch.no_grad():
    prob_list.append(train_model.model(X))
    target_list.append(y)

pred = torch.cat(prob_list,dim=0).to(torch.device('cpu'))
y = torch.cat(target_list,dim=0).to(torch.device('cpu'))

In [None]:
metric = ConfusionMatrix(task='binary', num_classes=2)
metric.update(pred[:,1], y)
fig_, ax_ = metric.plot()