In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from local.torch_basics import *
from local.test import *
from local.basics import *
from local.data.all import *
from local.vision.core import *
from local.notebook.showdoc import show_doc
from local.audio.core import *
from local.audio.augment import *
from local.vision.learner import *
from local.vision.models.xresnet import *
from local.metrics import *
from local.callback.schedule import *
import torchaudio
from fastprogress import progress_bar as pb
import time
from sklearn.linear_model import RidgeClassifierCV


In [None]:
from numba import njit, prange
import numpy as np

@njit
def generate_kernels(input_length, num_kernels, candidate_lengths=np.array((7, 9, 11)), stride=5):
    # initialise kernel parameters
    strides = np.ones(num_kernels, dtype = np.int32) * stride
    weights = np.zeros((num_kernels, candidate_lengths.max())) # see note
    lengths = np.zeros(num_kernels, dtype = np.int32) # see note
    biases = np.zeros(num_kernels)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)
    # note: only the first *lengths[i]* values of *weights[i]* are used
    for i in range(num_kernels):
        length = np.random.choice(candidate_lengths)
        _weights = np.random.normal(0, 1, length)
        bias = np.random.uniform(-1, 1)
        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) // (length - 1)))
        padding = ((length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        weights[i, :length] = _weights - _weights.mean()
        lengths[i], biases[i], dilations[i], paddings[i] = length, bias, dilation, padding
    return weights, lengths, biases, dilations, paddings, strides

In [None]:
@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding, stride):
    # zero padding
    if padding > 0:
        _input_length = len(X)
        _X = np.zeros(_input_length + (2 * padding))
        _X[padding:(padding + _input_length)] = X
        X = _X
    input_length = len(X)
    output_length = (input_length - ((length - 1) * dilation))//stride
    
    _ppv = 0 # "proportion of positive values"
    _max = np.NINF
    for i in range(output_length):
        _sum = bias
        for j in range(length):
            _sum += weights[j] * X[i*stride + (j * dilation)]
        if _sum > 0:
            _ppv += 1
        if _sum > _max:
            _max = _sum
    return _ppv / output_length, _max

In [None]:
@njit(parallel = True, fastmath = True)
def apply_kernels(X, kernels):
    weights, lengths, biases, dilations, paddings, strides = kernels
    num_examples = len(X)
    num_kernels = len(weights)
    # initialise output
    _X = np.zeros((num_examples, num_kernels * 2)) # 2 features per kernel
    for i in prange(num_examples):
        for j in range(num_kernels):
            _X[i, (j * 2):((j * 2) + 2)] = \
            apply_kernel(X[i], weights[j][:lengths[j]], lengths[j], biases[j], dilations[j], paddings[j], strides[j])
    return _X

In [None]:
p250speakers = Config()['data_path'] / '250_speakers'
untar_data(URLs.SPEAKERS250, fname=str(p250speakers)+'.tar', dest=p250speakers)

PosixPath('/home/jupyter/.fastai/data/250_speakers/250-speakers')

In [None]:
x = AudioGetter("", recurse=True, folders=None)
files_250  = x(p250speakers)

In [None]:
files_250

(#44655) [/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/_KymcHdEW0U/00034.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/2HgpwyiMUEE/00002.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/11xxoaj4aEA/00001.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/gC8wHtwhnZw/00036.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/gC8wHtwhnZw/00038.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/gC8wHtwhnZw/00037.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/DwK2JNH10zE/00013.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/DwK2JNH10zE/00012.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/DwK2JNH10zE/00011.wav,/home/jupyter/.fastai/data/250_speakers/250-speakers/id09028/4f8IoTgW8z0/00005.wav...]

In [None]:
oa = OpenAudio(files_250)

In [None]:
# 2s clip length causes Memory error 9GB RAM, we could also Resample to 8k
CLIP_LENGTH = 1

In [None]:
labeler = lambda x: str(x).split('/')[-3][3:]
sigs, labels = [],[]
cropper = CropSignal(1000*CLIP_LENGTH, pad_mode='repeat')
remove_silence = RemoveSilence()
for i in pb(range(len(files_250))):
    sigs.append(cropper(remove_silence(oa(i))).sig)
    labels.append(labeler(files_250[i]))

In [None]:
len(sigs), len(labels)

(44655, 44655)

In [None]:
train_size = int(44655*.8)
train_idxs = torch.randperm(44655)[:train_size]
valid_idxs = [i for i in range(44655) if i not in train_idxs]

In [None]:
assert len(train_idxs) + len(valid_idxs) == len(sigs)

In [None]:
x_train = [sigs[idx].squeeze(0).numpy() for idx in train_idxs]
y_train = [labels[idx] for idx in train_idxs]
x_valid = [sigs[idx].squeeze(0).numpy() for idx in valid_idxs]
y_valid = [labels[idx] for idx in valid_idxs]

In [None]:
list(map(len, (x_train, y_train, x_valid, y_valid)))

[35724, 35724, 8931, 8931]

In [None]:
np_x_train = np.stack(x_train).astype(np.float64)
np_x_valid = np.stack(x_valid).astype(np.float64)
np_x_train.shape, np_x_valid.shape

((35724, 16000), (8931, 16000))

In [None]:
max(y_train), min(y_train)

('9272', '8860')

In [None]:
cleaned_labels = sorted(list(set(y_train)))

In [None]:
len(cleaned_labels)

250

In [None]:
o2i_f = lambda x: cleaned_labels.index(x)

In [None]:
np_y_train = np.array(list(map(o2i_f, y_train)))
np_y_valid = np.array(list(map(o2i_f, y_valid)))

In [None]:
np_y_train

array([ 51, 176, 201, ...,  97,  50, 190])

In [None]:
len(np.unique(np_y_train)), len(np.unique(np_y_valid)),np_y_train.min(), np_y_train.max(), np_y_valid.min(), np_y_valid.max()

(250, 250, 0, 249, 0, 249)

In [None]:
np_x_train.shape, np_y_train.shape, np_x_valid.shape, np_y_valid.shape

((35724, 16000), (35724,), (8931, 16000), (8931,))

In [None]:
np_x_train.mean()

2.5251292232008987e-05

### Normalize the training data

In [None]:
np_x_train.shape

(35724, 32000)

In [None]:
np_x_train = (np_x_train - np_x_train.mean(axis = 1, keepdims = True)) / (np_x_train.std(axis = 1, keepdims = True) + 1e-8)
np_x_valid = (np_x_valid - np_x_valid.mean(axis = 1, keepdims = True)) / (np_x_valid.std(axis = 1, keepdims = True) + 1e-8)

In [None]:
np_x_train.mean(), np_x_train.std()

(2.199686084508134e-20, 0.9999997129530986)

In [None]:
np_x_train.dtype

dtype('float64')

In [None]:
candidate_lengths = np.array((7,9,11))
stride = 3

In [None]:
def timing_test(runs, candidate_lengths, stride, num_kernels, seq_length):
    times, scores = [],[]
    for i in range(runs):
        kernels = generate_kernels(seq_length, num_kernels, candidate_lengths, stride)
        start = time.time()
        x_train_tfm = apply_kernels(np_x_train, kernels)
       # #catch weird bug where np.NINF is occasionally showing up, hard to trace due to numba/jit
        3x_train_tfm[x_train_tfm <= -1E308] = 0
        x_valid_tfm = apply_kernels(np_x_valid, kernels)
        #x_valid_tfm[x_valid_tfm <= -1E308] = 0
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 7), normalize=True)
        classifier.fit(x_train_tfm, np_y_train)
        score = classifier.score(x_valid_tfm, np_y_valid)
        t = time.time()-start
        scores.append(score)
        times.append(t)
        print("Finished Run", i+1, "Score:", round(score, 3), "Time:", round(t,3))
    return times, scores

In [None]:
timing_test(2, np.array((7,9,11)), stride=5, num_kernels=200, seq_length=16000)

Finished Run 1 Score: 0.222 Time: 73.033
Finished Run 2 Score: 0.224 Time: 76.247


([73.03297019004822, 76.24688529968262],
 [0.22169969768223044, 0.22360317993505766])

In [None]:
timing_test(2, np.array((7,9,11)), stride=5, num_kernels=1000, seq_length=16000)

Finished Run 1 Score: 0.279 Time: 388.645
Finished Run 2 Score: 0.282 Time: 433.431


([388.64545917510986, 433.43074345588684],
 [0.2788041652670474, 0.28182734296271417])

## Looking at predictive power of a single kernel

In [None]:
times, scores = timing_test(100, np.array((7,9,11)), stride=5, num_kernels=1, seq_length=16000)

Finished Run 1 Score: 0.027 Time: 2.797
Finished Run 2 Score: 0.019 Time: 2.657
Finished Run 3 Score: 0.022 Time: 2.698
Finished Run 4 Score: 0.019 Time: 2.957
Finished Run 5 Score: 0.018 Time: 2.673
Finished Run 6 Score: 0.019 Time: 2.89
Finished Run 7 Score: 0.029 Time: 2.978
Finished Run 8 Score: 0.019 Time: 3.02
Finished Run 9 Score: 0.019 Time: 2.878
Finished Run 10 Score: 0.017 Time: 2.688
Finished Run 11 Score: 0.018 Time: 2.976
Finished Run 12 Score: 0.028 Time: 2.798
Finished Run 13 Score: 0.02 Time: 2.847
Finished Run 14 Score: 0.023 Time: 2.861
Finished Run 15 Score: 0.018 Time: 2.741
Finished Run 16 Score: 0.02 Time: 2.685
Finished Run 17 Score: 0.019 Time: 2.907
Finished Run 18 Score: 0.019 Time: 2.848
Finished Run 19 Score: 0.019 Time: 2.695
Finished Run 20 Score: 0.023 Time: 2.976
Finished Run 21 Score: 0.015 Time: 2.639
Finished Run 22 Score: 0.018 Time: 2.702
Finished Run 23 Score: 0.017 Time: 2.715
Finished Run 24 Score: 0.018 Time: 2.969
Finished Run 25 Score: 0.018 

In [None]:
sum(scores)/len(scores), min(scores), max(scores)

(0.020801701937073123, 0.012652558504086888, 0.029336020602396148)

In [None]:
times, scores = timing_test(10, np.array((7,9,11)), stride=5, num_kernels=2, seq_length=16000)

Finished Run 1 Score: 0.027 Time: 3.198
Finished Run 2 Score: 0.021 Time: 3.093
Finished Run 3 Score: 0.024 Time: 3.022
Finished Run 4 Score: 0.021 Time: 2.995
Finished Run 5 Score: 0.02 Time: 3.41
Finished Run 6 Score: 0.027 Time: 3.291
Finished Run 7 Score: 0.023 Time: 3.315
Finished Run 8 Score: 0.032 Time: 3.213
Finished Run 9 Score: 0.019 Time: 2.963
Finished Run 10 Score: 0.027 Time: 2.992


In [None]:
sum(scores)/len(scores), min(scores), max(scores)

(0.02417422461090583, 0.019370731161124175, 0.03235919829806293)

In [None]:
def timing_test_only_ppv(runs, candidate_lengths, stride, num_kernels, seq_length):
    times, scores = [],[]
    for i in range(runs):
        kernels = generate_kernels(seq_length, num_kernels, candidate_lengths, stride)
        start = time.time()
        x_train_tfm = apply_kernels(np_x_train, kernels)[:,[0,2]]
        #print(x_train_tfm.shape)
        #print(x_train_tfm[:,[0,2]].shape)
       # #catch weird bug where np.NINF is occasionally showing up, hard to trace due to numba/jit
        x_valid_tfm = apply_kernels(np_x_valid, kernels)[:,[0,2]]
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 7), normalize=True)
        classifier.fit(x_train_tfm, np_y_train)
        score = classifier.score(x_valid_tfm, np_y_valid)
        t = time.time()-start
        scores.append(score)
        times.append(t)
        print("Finished Run", i+1, "Score:", round(score, 3), "Time:", round(t,3))
    return times, scores

In [None]:
times, scores = timing_test_only_ppv(10, np.array((7,9,11)), stride=5, num_kernels=2, seq_length=16000)

Finished Run 1 Score: 0.019 Time: 3.114
Finished Run 2 Score: 0.024 Time: 3.296
Finished Run 3 Score: 0.02 Time: 3.095
Finished Run 4 Score: 0.019 Time: 3.154
Finished Run 5 Score: 0.02 Time: 2.984
Finished Run 6 Score: 0.02 Time: 3.081
Finished Run 7 Score: 0.024 Time: 3.201
Finished Run 8 Score: 0.022 Time: 3.265
Finished Run 9 Score: 0.023 Time: 3.07
Finished Run 10 Score: 0.017 Time: 3.147


In [None]:
sum(scores)/len(scores), min(scores), max(scores)

(0.02069197178367484, 0.017467248908296942, 0.02373754338819841)

In [None]:
def timing_test_only_maxpool(runs, candidate_lengths, stride, num_kernels, seq_length):
    times, scores = [],[]
    for i in range(runs):
        kernels = generate_kernels(seq_length, num_kernels, candidate_lengths, stride)
        start = time.time()
        x_train_tfm = apply_kernels(np_x_train, kernels)[:,[1,3]]
        #print(x_train_tfm.shape)
        #print(x_train_tfm[:,[0,2]].shape)
        x_valid_tfm = apply_kernels(np_x_valid, kernels)[:,[1,3]]
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 7), normalize=True)
        classifier.fit(x_train_tfm, np_y_train)
        score = classifier.score(x_valid_tfm, np_y_valid)
        t = time.time()-start
        scores.append(score)
        times.append(t)
        print("Finished Run", i+1, "Score:", round(score, 3), "Time:", round(t,3))
    return times, scores

In [None]:
times, scores = timing_test_only_maxpool(10, np.array((7,9,11)), stride=5, num_kernels=2, seq_length=16000)

Finished Run 1 Score: 0.018 Time: 3.357
Finished Run 2 Score: 0.013 Time: 2.778
Finished Run 3 Score: 0.018 Time: 3.305
Finished Run 4 Score: 0.02 Time: 3.296
Finished Run 5 Score: 0.022 Time: 2.832
Finished Run 6 Score: 0.02 Time: 2.956
Finished Run 7 Score: 0.018 Time: 3.311
Finished Run 8 Score: 0.021 Time: 3.344
Finished Run 9 Score: 0.018 Time: 3.363
Finished Run 10 Score: 0.017 Time: 3.157


In [None]:
sum(scores)/len(scores), min(scores), max(scores)

(0.018474974806852538, 0.013436345314074572, 0.021722091591087223)

### PPV appears to be more predictive than maxpool, what if we invent a bunch of other features to process the output from random conv? time wise these should be free

In [None]:
@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding, stride):
    # zero padding
    if padding > 0:
        _input_length = len(X)
        _X = np.zeros(_input_length + (2 * padding))
        _X[padding:(padding + _input_length)] = X
        X = _X
    input_length = len(X)
    output_length = (input_length - ((length - 1) * dilation))//stride
    
    _ppv = 0 # "proportion of positive values"
    _max = np.NINF
    for i in range(output_length):
        _sum = bias
        s = 0
        _sum = bias + weights * X[np.linspace(i*stride, i*stride+length*dilation, length)]
        if _sum > 0:
            _ppv += 1
        if _sum > _max:
            _max = _sum
        s += _sum
    # add in mean/global avg poolign
    return _ppv / output_length, _max, s/output_length

@njit(parallel = True, fastmath = True)
def apply_kernels(X, kernels):
    weights, lengths, biases, dilations, paddings, strides = kernels
    num_examples = len(X)
    num_kernels = len(weights)
    # initialise output
    _X = np.zeros((num_examples, num_kernels * 3)) # 3 features per kernel
    for i in prange(num_examples):
        for j in range(num_kernels):
            _X[i, (j * 3):((j * 3) + 3)] = \
            apply_kernel(X[i], weights[j][:lengths[j]], lengths[j], biases[j], dilations[j], paddings[j], strides[j])
    return _X