In [3]:
import matplotlib.pyplot as plt
import numpy as np
import random
from scipy import signal
import pandas as pd

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils import data

torch.cuda.set_device(2)
torch.manual_seed(0)

<torch._C.Generator at 0x7f5baef5a150>

# Get Data and generate spectrograms

In [114]:
song_df = pd.read_hdf('song_df.h5', 'song_df')

In [109]:
plt.ioff()
for i in range(len(song_df.index)):
    a = song_df.iloc[i].Data
    fig, ax = plt.subplots(1, 1, figsize=(0.375*2,0.375*2), frameon=False, facecolor='black')
    ax.get_yaxis().set_visible(False)
    ax.get_xaxis().set_visible(False)
    ax.set_facecolor("black")
    #fig.add_axes(ax)
    frequencies, times, spectrogram = signal.spectrogram(a, 16000)
    plt.pcolormesh(np.log(spectrogram))
    plt.axis('off')
    #plt.show()
    filename = song_df.iloc[i].Name + ".png"
    #fig.set_tight_layout(True)
    extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
    #plt.savefig('/tmp/test.png', bbox_inches=extent)
    fig.savefig("./spectrograms/"+filename, bbox_inches=extent)
    plt.close(fig)

# Log of Spectrogram caused an error because spectrogram values had 0 in sing034_3 due to the pause in between.

  # Remove the CWD from sys.path while we load stuff.


In [115]:
song_df.loc[song_df['Name'] == 'sing034_3']

Unnamed: 0,Name,Data,Freq,Type,MFCC
373,sing034_3,"[-19464192, -16252928, -5177344, 14483456, 465...",16000,0,"[[38.812406955625654, -6.280405806858493, -13...."


# Updating the data repository

In [78]:
import cv2
import copy

In [124]:
spectro_list = []

for i in range(len(song_df.index)):
    filename = "spectrograms/"+song_df.iloc[i].Name+".png"
    temp_pic = cv2.imread(filename)
    temp_pic = cv2.cvtColor(temp_pic, cv2.COLOR_RGB2GRAY)
    temp_pic = temp_pic/255
    spectro_list.append(copy.deepcopy(temp_pic[:, :47]))

In [125]:
song_df['Spectrogram'] = pd.Series(spectro_list, index=song_df.index)

In [126]:
song_df.to_hdf('song_df.h5', key='song_df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block1_values] [items->['Name', 'Data', 'MFCC', 'Spectrogram']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)


# Generating X and y for training

In [120]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [127]:
song_df = pd.read_hdf('song_df.h5', 'song_df')
X = song_df.Spectrogram.values
y = song_df.Type.values

onehot_encoder = OneHotEncoder(sparse=False)
y = onehot_encoder.fit_transform(y.reshape(len(y), 1))
print(y)

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 ...
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [129]:
X_new = np.zeros((1440, 47, 47))
for i,d in enumerate(X):
    X_new[i,:,:] = d[:, :]
print(X_new.shape)
X = X_new

(1440, 47, 47)


In [130]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [131]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

In [132]:
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

(1101, 47, 47) (1101, 3)
(195, 47, 47) (195, 3)
(144, 47, 47) (144, 3)


In [133]:
# Saving the generated datasets

# Training set
np.save("./numpy_ds/x_train_conv", X_train)
np.save("./numpy_ds/y_train_conv", y_train)

# Validation set
np.save("./numpy_ds/x_val_conv", X_val)
np.save("./numpy_ds/y_val_conv", y_val)

# Test set
np.save("./numpy_ds/x_test_conv", X_test)
np.save("./numpy_ds/y_test_conv", y_test)

## Loading New dataset

In [134]:
X_train = np.load("./numpy_ds/x_train_conv.npy")
X_val = np.load("./numpy_ds/x_val_conv.npy")
X_test = np.load("./numpy_ds/x_test_conv.npy")

y_train = np.load("./numpy_ds/y_train_conv.npy")
y_val = np.load("./numpy_ds/y_val_conv.npy")
y_test = np.load("./numpy_ds/y_test_conv.npy")

# Model the Convnet

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils import data

torch.cuda.set_device(3)
torch.manual_seed(0)

In [None]:
class SpectroCNN(nn.Module):
    def __init__(self):
        super(LocatorCNN, self).__init__()
        self.conv_layer1 = nn.Conv2d(1, 16, kernel_size=3, stride=1)
        self.conv_layer2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, dilation=dilate)
        self.batchnorm1 = nn.BatchNorm2d(32)

        self.conv_layer3 = nn.Conv2d(32, 64, kernel_size=3, stride=1)
        self.conv_layer4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, dilation=dilate)
        #self.batchnorm2 = nn.BatchNorm2d(64)
        
        self.conv_layer5 = nn.Conv2d(64, 128, kernel_size=3, stride=1)
        self.conv_layer6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, dilation=dilate)
        #self.conv_layer7 = nn.Conv2d(128, 128, kernel_size=5, stride=1)
        #self.batchnorm3 = nn.BatchNorm2d(128)
    
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        
        self.drop2 = nn.Dropout2d(p=0.1)
        self.drop1 = nn.Dropout(p=0.05)
        
        self.activation_layer_2d = nn.Tanh()
        self.activation_layer = nn.ELU()
        
        #self.linear_0 = nn.Linear(107648, 4096)
        self.linear_1 = nn.Linear(115200, 2048) #with maxpooling
        #self.linear_1 = nn.Linear(9750528, 2048)
        self.linear_2 = nn.Linear(2048, 512)
        self.linear_3 = nn.Linear(512, 128)
        self.linear_4 = nn.Linear(128, 32)
        self.linear_5 = nn.Linear(32, 8)
        self.linear_6 = nn.Linear(8, 2)
    
    def forward(self, x, dim, channel):
        x = x.view((1, 1, dim, -1))
        channel = channel.view(-1)

        out = self.conv_layer1(x)
        out = self.activation_layer_2d(out)
        #print(out.shape)
        out = self.conv_layer2(out)
        out = self.batchnorm1(out)
        #print(out.shape)
        out = self.activation_layer_2d(out)
        out = self.maxpool(out)
        out = self.drop2(out)
        #print(out.shape)
        
        out = self.conv_layer3(out)
        out = self.activation_layer_2d(out)
        #print(out.shape)
        out = self.conv_layer4(out)
        out = self.activation_layer_2d(out)
        #print(out.shape)
        #out = self.batchnorm2(out)
        out = self.maxpool(out)
        out = self.drop2(out)
        #print(out.shape)
        
        out = self.conv_layer5(out)
        out = self.activation_layer_2d(out)
        #out = self.batchnorm3(out)
        #print(out.shape)
        out = self.conv_layer6(out)
        out = self.activation_layer_2d(out)
        out = self.maxpool(out)
        #out = self.drop2(out)
        #print(out.shape)
        
        out = out.view(-1)
        
        #out = self.linear_0(out)
        #out = self.activation_layer(out)
        #out = self.drop1(out)
        out = self.linear_1(out)
        out = self.activation_layer(out)
        out = self.linear_2(out)
        out = self.activation_layer(out)
        out = self.drop1(out)
        out = self.linear_3(out)
        out = self.activation_layer(out)
        out = self.linear_4(out)
        out = self.activation_layer(out)
        out = torch.cat((out, channel))
        out = self.linear_5(out)
        out = self.activation_layer(out)
        out = self.linear_6(out)
        
        return out