In [1007]:
# Clone the repository
!git clone https://github.com/miladmozafari/SpykeTorch

fatal: destination path 'SpykeTorch' already exists and is not an empty directory.


In [1008]:
!git clone https://github.com/Jakobovski/free-spoken-digit-dataset.git

fatal: destination path 'free-spoken-digit-dataset' already exists and is not an empty directory.


In [1009]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import wave
import pylab
import torchvision
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.nn.parameter import Parameter
from scipy import signal
from scipy.io import wavfile
from pathlib import Path
from torchvision import transforms
from SpykeTorch.SpykeTorch import snn
from SpykeTorch.SpykeTorch import functional as sf
from SpykeTorch.SpykeTorch import visualization as vis
from SpykeTorch.SpykeTorch import utils
import glob
import librosa
import librosa.display
import random
from tqdm import tqdm_notebook as tqdm

In [1019]:
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T

In [1010]:
!pip install natsort



In [1011]:
from natsort import natsorted

In [1013]:
INPUT_DIR = './free-spoken-digit-dataset/recordings'

OUTPUT_DIR = './'

In [1014]:
fsdd_dir = './audio_fsdd/'

In [1015]:
# For every recording, make a spectogram and save it as label_speaker_no.png
if not os.path.exists(os.path.join(OUTPUT_DIR, 'audio-images')):
    os.mkdir(os.path.join(OUTPUT_DIR, 'audio-images'))

In [1016]:
# For every recording, make a spectogram and save it as label_speaker_no.png
if not os.path.exists(fsdd_dir):
    os.mkdir(fsdd_dir)

In [1017]:
audio_dict = {}
audio_dict["index"] = []
audio_dict["label"] = []
audio_dict["file_name"] = []
audio_dict["file_path"] = []

In [1018]:
audio_dict

{'index': [], 'label': [], 'file_name': [], 'file_path': []}

In [1020]:
max_length = 10000

In [1021]:
# Iterate through each sound file
for index, file_name in enumerate(natsorted(os.listdir(INPUT_DIR))):
    label = Path(file_name).stem[0]
    file_path = os.path.join(INPUT_DIR, file_name)
    
    signal, sr = torchaudio.load(file_path)
    
    if (signal.shape[1] < max_length):
        audio_dict["index"].append(index)
        # audio_dict["data"].append()
        audio_dict["label"].append(label)
        audio_dict["file_name"].append(file_name)
        audio_dict["file_path"].append(file_path)


In [1022]:
df = pd.DataFrame(audio_dict)
df

Unnamed: 0,index,label,file_name,file_path
0,0,0,0_george_0.wav,./free-spoken-digit-dataset/recordings/0_georg...
1,1,0,0_george_1.wav,./free-spoken-digit-dataset/recordings/0_georg...
2,2,0,0_george_2.wav,./free-spoken-digit-dataset/recordings/0_georg...
3,3,0,0_george_3.wav,./free-spoken-digit-dataset/recordings/0_georg...
4,4,0,0_george_4.wav,./free-spoken-digit-dataset/recordings/0_georg...
...,...,...,...,...
2990,2995,9,9_yweweler_45.wav,./free-spoken-digit-dataset/recordings/9_ywewe...
2991,2996,9,9_yweweler_46.wav,./free-spoken-digit-dataset/recordings/9_ywewe...
2992,2997,9,9_yweweler_47.wav,./free-spoken-digit-dataset/recordings/9_ywewe...
2993,2998,9,9_yweweler_48.wav,./free-spoken-digit-dataset/recordings/9_ywewe...


In [1023]:
from torch.utils.data import Dataset

In [1024]:
class FSDDDataset(Dataset):

    def __init__(self, df_annotations):
        self.annotations = df_annotations

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)
        label = self._get_audio_sample_label(index)
        signal = torchaudio.load(audio_sample_path)
        return signal, label

    def _get_audio_sample_path(self, index):
        path = self.annotations.iloc[index, 3]
        return path

    def _get_audio_sample_label(self, index):
        return self.annotations.iloc[index, 1]

In [1027]:
class S1C1Transform:
    def __init__(self, timesteps = 15):
        self.temporal_transform = utils.Intensity2Latency(timesteps)

    def __call__(self, signal):
        audio = signal[0].squeeze_().squeeze_().numpy()
        sr = signal[1].item()
        audio = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=30, n_fft=int(0.02*sr),hop_length=int(0.01*sr))
        max_pad_len = 125

        pad_width = max_pad_len - audio.shape[1]
        audio = np.pad(audio, pad_width=((0, 0), (0, pad_width)), mode='constant')
        audio = torch.from_numpy(audio)
        audio.unsqueeze_(0).unsqueeze_(0)
        temporal_audio = self.temporal_transform(audio)
        return temporal_audio.sign().byte()

In [1028]:
s1c1 = S1C1Transform()

In [1029]:
dataset = FSDDDataset(df)

In [1030]:
# splitting training and testing sets
indices = list(range(int(len(dataset))))
random.shuffle(indices)
split_point = int(0.75*len(indices))
train_indices = indices[:split_point]
test_indices = indices[split_point:]
print("Size of the training set:", len(train_indices))
print("Size of the  testing set:", len(test_indices))

Size of the training set: 2246
Size of the  testing set: 749


In [1031]:
from torch.utils.data import DataLoader
from torch.utils.data import SubsetRandomSampler

dataset = utils.CacheDataset(dataset)
train_loader = DataLoader(dataset, sampler=SubsetRandomSampler(train_indices))
test_loader = DataLoader(dataset, sampler=SubsetRandomSampler(test_indices))

In [1185]:
class FSDDSpykeTorch(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv3 = snn.Convolution(1, 600, 9, 0.8, 0.05)
        self.conv3_t = 22

        self.stdp3 = snn.STDP(self.conv3, (0.03, -0.03), False, 0.2, 0.8)
        self.anti_stdp3 = snn.STDP(self.conv3, (-0.001, 0.001), False, 0.2, 0.8)

        self.decision_map = []
        for i in range(10):
            self.decision_map.extend([i]*60)


        self.ctx = {"input_spikes":None, "potentials":None, "output_spikes":None, "winners":None}
        self.spk_cnt1 = 0
        self.spk_cnt2 = 0

    def forward(self, input, max_layer):
        input = sf.pad(input.float(), (2,2,2,2), 0)
        if self.training:
            pot = self.conv3(input)
            spk, pot = sf.fire(pot, self.conv3_t, True)
            winners = sf.get_k_winners(pot, 1, 0, spk)
            self.ctx["input_spikes"] = input
            self.ctx["potentials"] = pot
            self.ctx["output_spikes"] = spk
            self.ctx["winners"] = winners
            output = -1
            if len(winners) != 0:
                output = self.decision_map[winners[0][0]]
            return output
        else:
            pot = self.conv3(input)
            spk, pot = sf.fire(pot, self.conv3_t, True)
            winners = sf.get_k_winners(pot, 1, 0, spk)
            output = -1
            if len(winners) != 0:
                output = self.decision_map[winners[0][0]]
            return output

    def stdp(self, layer_idx):
        if layer_idx == 1:
            self.stdp1(self.ctx["input_spikes"], self.ctx["potentials"], self.ctx["output_spikes"], self.ctx["winners"])

    def update_learning_rates(self, stdp_ap, stdp_an, anti_stdp_ap, anti_stdp_an):
        self.stdp3.update_all_learning_rate(stdp_ap, stdp_an)
        self.anti_stdp3.update_all_learning_rate(anti_stdp_an, anti_stdp_ap)

    def reward(self):
        self.stdp3(self.ctx["input_spikes"], self.ctx["potentials"], self.ctx["output_spikes"], self.ctx["winners"])

    def punish(self):
        self.anti_stdp3(self.ctx["input_spikes"], self.ctx["potentials"], self.ctx["output_spikes"], self.ctx["winners"])

In [1186]:
def train_rl(network, data, target):
    network.train()
    perf = np.array([0,0,0]) # correct, wrong, silence
    for i in range(len(data)):
        data_in = data[i]
        target_in = target[i]
        if use_cuda:
            data_in = data_in.cuda()
        d = network(data_in, 3)
        if d != -1:
            if int(d) == int(target_in):
                perf[0]+=1
                network.reward()
            else:
                perf[1]+=1
                network.punish()
        else:
            perf[2]+=1
    return perf/len(data)

In [1187]:
def test(network, data, target):
    network.eval()
    perf = np.array([0,0,0]) # correct, wrong, silence
    for i in range(len(data)):
        data_in = data[i]
        target_in = target[i]
        if use_cuda:
            data_in = data_in.cuda()
        d = network(data_in, 3)
        if d != -1:
            if int(d) == int(target_in):
                perf[0]+=1
            else:
                perf[1]+=1
        else:
            perf[2]+=1
    return perf/len(data)

In [1188]:
use_cuda = True

In [1189]:
net = FSDDSpykeTorch()
if use_cuda:
    net.cuda()

In [1193]:
# perf
best_train = np.array([0.0,0.0,0.0,0.0]) # correct, wrong, sile, epoch
best_test = np.array([0.0,0.0,0.0,0.0]) # correct, wrong, silence, epoch

In [1194]:
# if os.path.isfile("/content/saved.net"):
#     net.load_state_dict(torch.load("/content/saved.net"))

In [1202]:
# Training The Third Layer
print("Training the third layer")
for epoch in tqdm(range(10)):
    print("Epoch #:", epoch)
    perf_train = np.array([0.0,0.0,0.0])
    perf_test = np.array([0.0,0.0,0.0])
    for data,targets in train_loader:
        data_transform = s1c1(data)
        data_transform.unsqueeze_(0)
        perf_train_batch = train_rl(net, data_transform, targets)
        perf_train += perf_train_batch
    print("perf train: ", perf_train)
    perf_train /= len(train_loader)
    if best_train[0] <= perf_train[0]:
        best_train = np.append(perf_train, epoch)
    print("Current Train:", perf_train)
    print("   Best Train:", best_train)

    for data,targets in test_loader:
        data_transform = s1c1(data)
        data_transform.unsqueeze_(0)
        perf_test += test(net, data_transform, targets)
    perf_test /= len(test_loader)
    if best_test[0] <= perf_test[0]:
        best_test = np.append(perf_test, epoch)
        torch.save(net.state_dict(), "saved.net")
    print("Current Test:", perf_test)
#     print("    Best Test:", best_test)

Training the third layer


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch #: 0
perf train:  [1348.  898.    0.]
Current Train: [0.60017809 0.39982191 0.        ]
   Best Train: [ 0.60552093  0.39447907  0.         18.        ]
Current Test: [0.36048064 0.63951936 0.        ]
Epoch #: 1
perf train:  [1343.  903.    0.]
Current Train: [0.59795191 0.40204809 0.        ]
   Best Train: [ 0.60552093  0.39447907  0.         18.        ]
Current Test: [0.35914553 0.64085447 0.        ]
Epoch #: 2
perf train:  [1347.  899.    0.]
Current Train: [0.59973286 0.40026714 0.        ]
   Best Train: [ 0.60552093  0.39447907  0.         18.        ]
Current Test: [0.36048064 0.63951936 0.        ]
Epoch #: 3
perf train:  [1327.  919.    0.]
Current Train: [0.59082814 0.40917186 0.        ]
   Best Train: [ 0.60552093  0.39447907  0.         18.        ]
Current Test: [0.35380507 0.64619493 0.        ]
Epoch #: 4
perf train:  [1342.  904.    0.]
Current Train: [0.59750668 0.40249332 0.        ]
   Best Train: [ 0.60552093  0.39447907  0.         18.        ]
Current T

In [1204]:
print("best train: ", best_train)
print("best test: ", best_test)

best train:  [ 0.60552093  0.39447907  0.         18.        ]
best test:  [ 0.36849132  0.63150868  0.         19.        ]


In [1198]:
torch.save(net.state_dict(), "saved.net")

In [1200]:
# net.update_learning_rates(0.0003, -0.0003, -0.0001, 0.0001)