In [1]:
from google.colab import drive 
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
from tqdm import tqdm
import librosa.feature
from PIL import Image
import os
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from IPython.display import clear_output
from sklearn.metrics import roc_auc_score

In [3]:
def spec2image(spec, eps=1e-6):
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)
    return spec_scaled

In [11]:
class CLFData(torch.utils.data.Dataset):
    def __init__(self, path_data):
        self.path_data = path_data
        self.class1 = os.listdir(self.path_data + 'noisy/')
        self.class1.sort()
        self.class2 = os.listdir(self.path_data + 'clear/')
        self.class2.sort()
        self.labels0 = np.zeros(len(self.class1))
        self.labels1 = np.ones(len(self.class2))
        self.all_paths_class1 = np.array([self.path_data + 'noisy/' + name for name in self.class1])
        self.all_paths_class2 = np.array([self.path_data + 'clear/' + name for name in self.class2])
        self.all_paths = np.hstack((self.all_paths_class1,self.all_paths_class2))
        self.labels = np.hstack((self.labels0, self.labels1))
    def __len__(self):
        return len(self.all_paths)
    def __getitem__(self, idx):
        image = spec2image(np.load(self.all_paths[idx]).T)
        if image.shape[1] <= 200:
            image_crop = np.zeros((80,200))
            image_crop[:image.shape[0],:image.shape[1]] = image
        else:
            rand_coord = np.random.randint(0, image.shape[1]-200)
            image_crop = image[:, rand_coord:rand_coord+200]
        image_crop = torch.FloatTensor(image_crop)
        image_crop = image_crop.unsqueeze(0)
        image_crop.requires_grad_(True)
        return image_crop, torch.FloatTensor([self.labels[idx]])

In [5]:
model = models.resnet18()

In [6]:
model.conv1 = nn.Conv2d(1,64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.fc = nn.Linear(in_features=512, out_features=1, bias=True)
model = nn.Sequential(
    model,
    nn.Sigmoid()
)

In [7]:
print(model)

Sequential(
  (0): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [12]:
train_data = CLFData('/content/gdrive/My Drive/train/')
train_loader = torch.utils.data.DataLoader(train_data, batch_size=256, shuffle=True)
val_data = CLFData('/content/gdrive/My Drive/validate/')
val_loader = torch.utils.data.DataLoader(val_data, batch_size=len(val_data), shuffle=True)

In [None]:
optimizer = torch.optim.Adam(model.parameters())

num_epochs = 100
train_loss = []
roc = []
i = 0
model = model.cuda()

for epoch in range(num_epochs):
    for X_batch, y_batch in tqdm(train_loader):
        X_batch = X_batch.cuda()
        y_batch = y_batch.cuda()

        optimizer.zero_grad()

        predict = model(X_batch)
        loss = nn.BCELoss()(predict,y_batch)
        loss.backward()    
        optimizer.step()

        error = loss.item()
        train_loss.append(error)

        with torch.no_grad():
            for x, y in val_loader:
                x = x.cuda()
                y = y.cuda()
                val_predict = model(x)

        if i%5 == 0:
            torch.save(model, '/content/gdrive/My Drive/resnet_model')
        i += 1
        clear_output(True)
        print('Epoch: ', epoch)
        roc.append(roc_auc_score(y.detach().cpu().numpy(), val_predict.detach().cpu().numpy().ravel()))
        plt.figure(figsize=(10,5))
        plt.subplot(121)
        plt.semilogy(train_loss)
        plt.title('Train loss')
        plt.grid()
        plt.subplot(122)
        plt.plot(roc)
        plt.title('validation ROC-AUC')
        plt.grid()
        plt.show()


  0%|          | 0/94 [00:00<?, ?it/s]