In [0]:
import numpy as np
import librosa

from torch.autograd import Variable
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import pandas as pd
from sklearn.model_selection import train_test_split


# Interface colab notebook with dataset in google drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
#Check google drive directory 
!ls "/content/drive/My Drive/Colab Notebooks/"

 NN-MFCCs.ipynb   NonProg  'Progressive Rock Songs'


# Get mp3 file names

In [49]:
from google.colab import drive

import glob

filePaths_NonProg = glob.glob("/content/drive/My Drive/Colab Notebooks/NonProg/***.mp3")
filePaths_Prog = glob.glob("/content/drive/My Drive/Colab Notebooks/Progressive Rock Songs/***.mp3")
print(len(filePaths_NonProg))
print(len(filePaths_Prog))
numberNonProg = len(filePaths_NonProg)
numberProg = len(filePaths_Prog)
#for i in range(len(filePaths_Prog)):
#  print(filePaths_Prog[i])

302
73


# Extract Features - MFCCs and Covariance of MFCCs

In [0]:

numOfMFCCS = 20

def getMFCCS(x, sr, nMFCCs):
    stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
    mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
    mfccs = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
    meanMFCCS = np.mean(mfccs, axis=1)

    cov = np.cov(mfccs)
    upperCovIndicies = np.triu_indices(nMFCCs)
    upperCov = cov[upperCovIndicies]
    return meanMFCCS, upperCov, mfccs

def normalizeInputs(x):
    mean = x.mean()
    std = x.std()
    z = (x-mean)/std
    return z

Grab features from files, store into matrix

In [0]:
nonProgInputs = np.zeros((numberNonProg, inputSize))
for i, file in enumerate(filePaths_NonProg):
  print(i)
  x, sr = librosa.load(file, sr=None, mono=True, duration = 40)
  meanMFCC, covMFCC, mfccs = getMFCCS(x, sr, numOfMFCCS)
  meanMFCCNormalized = normalizeInputs(meanMFCC)
  covMFCCNormalized = normalizeInputs(covMFCC)
  
  inputs = np.concatenate((meanMFCCNormalized, covMFCCNormalized))
  inputs = np.expand_dims(inputs,axis=1).T

  nonProgInputs[i,:] = inputs
  

In [60]:
progInputs = np.zeros((numberProg, inputSize))
for i, file in enumerate(filePaths_Prog):
  print(i)
  x, sr = librosa.load(file, sr=None, mono=True, duration = 40)
  meanMFCC, covMFCC, mfccs = getMFCCS(x, sr, numOfMFCCS)
  meanMFCCNormalized = normalizeInputs(meanMFCC)
  covMFCCNormalized = normalizeInputs(covMFCC)
  
  inputs = np.concatenate((meanMFCCNormalized, covMFCCNormalized))
  inputs = np.expand_dims(inputs,axis=1).T

  progInputs[i,:] = inputs




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72


In [63]:
#Combine prog with non prog inputs
allInputs = np.concatenate((progInputs, nonProgInputs))
allInputs.shape

(375, 230)

# Create data labels

In [79]:
#Create labels 
yProg = np.ones((numberProg,1))
yNonProg = -np.ones((numberNonProg,1))
y = np.concatenate((yProg, yNonProg))

(375, 1)

# Split dataset into training set and test set

In [0]:
#splitting the training set
testSize = .2
randStateSplit = 42
inputTrain, inputTest, labelTrain, labelTest = train_test_split(allInputs, y, test_size=testSize, random_state = randStateSplit, shuffle = True)

# Convert our dataset into a pseudo-pytorch data loader format to feed into the Neural Network (can be improved)

In [0]:

numTrainingSamples = inputTrain.shape[0]
input_size = inputTrain.shape[1]

batch_size = 25
numOfTrainingBatches = 12

inputTrainPD = pd.DataFrame(np.concatenate((labelTrain, inputTrain),axis = 1))

train_loader = []
labels = []
for g, temp_df in inputTrainPD.groupby(np.arange(numTrainingSamples) // batch_size):
  tempLabels = np.asarray(temp_df.iloc[:,0])
  tempLabels = torch.LongTensor(tempLabels)
  tempLabelsSqueezed = torch.squeeze(tempLabels)
  labels.append(tempLabelsSqueezed)

  tempInputs = temp_df.iloc[:,1:].as_matrix() #convert to np array ... as_matrix returns np array
  torchSamples = []
  for i in range(batch_size):
    #import pdb; pdb.set_trace()
    tempTorchSample = torch.FloatTensor(tempInputs[i])
    torchSamples.append(torch.t(tempTorchSample.view(input_size,1)))
  #   import pdb; pdb.set_trace()
  stackedInputTensors = torch.stack(torchSamples)    

  train_loader.append(stackedInputTensors.view(batch_size,input_size))

# Neural Network Architecture

In [0]:
inputSize = 230
hidden_size1 = 200
hidden_size2 = 50
num_classes = 1
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2) 
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size2, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

net = NeuralNet(input_size, hidden_size1, hidden_size2, num_classes).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  



# Train model

In [137]:
num_epochs = 100
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i in range(numOfTrainingBatches):  
        # Move tensors to the configured device
        input_x = Variable(train_loader[i])
        label_x = Variable(labels[i])
        
        # Forward pass
        outputs = net(input_x.float())
        outputs = torch.squeeze(outputs)
        
        #import pdb; pdb.set_trace()
        loss = criterion(outputs, label_x.float())
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        #if (i+1) % 100 == 0:
        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/100], Step [1/12], Loss: 0.8378
Epoch [1/100], Step [2/12], Loss: 0.7810
Epoch [1/100], Step [3/12], Loss: 0.7442
Epoch [1/100], Step [4/12], Loss: 0.8049
Epoch [1/100], Step [5/12], Loss: 0.6984
Epoch [1/100], Step [6/12], Loss: 0.7404
Epoch [1/100], Step [7/12], Loss: 0.8758
Epoch [1/100], Step [8/12], Loss: 0.8065
Epoch [1/100], Step [9/12], Loss: 0.8285
Epoch [1/100], Step [10/12], Loss: 0.8316
Epoch [1/100], Step [11/12], Loss: 0.8194
Epoch [1/100], Step [12/12], Loss: 0.7107
Epoch [2/100], Step [1/12], Loss: 0.8378
Epoch [2/100], Step [2/12], Loss: 0.7810
Epoch [2/100], Step [3/12], Loss: 0.7442
Epoch [2/100], Step [4/12], Loss: 0.8049
Epoch [2/100], Step [5/12], Loss: 0.6984
Epoch [2/100], Step [6/12], Loss: 0.7404
Epoch [2/100], Step [7/12], Loss: 0.8758
Epoch [2/100], Step [8/12], Loss: 0.8065
Epoch [2/100], Step [9/12], Loss: 0.8285
Epoch [2/100], Step [10/12], Loss: 0.8316
Epoch [2/100], Step [11/12], Loss: 0.8194
Epoch [2/100], Step [12/12], Loss: 0.7107
Epoch [3/1

# Test model

In [0]:

# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
#with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in test_loader:
#         images = images.reshape(-1, 28*28).to(device)
#         labels = labels.to(device)
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

#     print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))



# Save the NN weights

In [0]:
# # Save the model checkpoint
# torch.save(model.state_dict(), 'model.ckpt')