In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from local.torch_basics import *
from local.test import *
from local.basics import *
from local.data.all import *
from local.vision.core import *
from local.notebook.showdoc import show_doc
from local.audio.core import *
from local.audio.augment import *
from local.vision.learner import *
from local.vision.models.xresnet import *
from local.metrics import *
from local.callback.schedule import *
import torchaudio
from fastprogress import progress_bar as pb
from numba import njit, prange

In [None]:
p10speakers = Config()['data_path'] / 'ST-AEDS-20180100_1-OS'

In [None]:
x = AudioGetter("", recurse=True, folders=None)
files_10  = x(p10speakers)

In [None]:
files_10

(#3842) [/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/f0004_us_f0004_00446.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/m0002_us_m0002_00128.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/f0003_us_f0003_00279.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/f0001_us_f0001_00168.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/f0005_us_f0005_00286.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/m0005_us_m0005_00282.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/f0005_us_f0005_00432.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/f0005_us_f0005_00054.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/m0004_us_m0004_00110.wav,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/m0003_us_m0003_00180.wav...]

In [None]:
oa = OpenAudio(files_10)

In [None]:
CLIP_LENGTH = 2

In [None]:
labeler = lambda x: str(x).split('/')[-1][:5]
sigs, labels = [],[]
cropper = CropSignal(1000*CLIP_LENGTH, pad_mode='repeat')
remove_silence = RemoveSilence()
for i in pb(range(len(files_10))):
    sigs.append(cropper(remove_silence(oa(i))).sig)
    labels.append(labeler(files_10[i]))

In [None]:
len(sigs), len(labels)

(3842, 3842)

In [None]:
train_size = int(3842*.8)
train_idxs = torch.randperm(3842)[:train_size]
valid_idxs = [i for i in range(3842) if i not in train_idxs]

In [None]:
assert len(train_idxs) + len(valid_idxs) == len(sigs)

In [None]:
x_train = [sigs[idx].numpy() for idx in train_idxs]
y_train = [labels[idx] for idx in train_idxs]
x_valid = [sigs[idx].numpy() for idx in valid_idxs]
y_valid = [labels[idx] for idx in valid_idxs]

In [None]:
list(map(len, (x_train, y_train, x_valid, y_valid)))

[3073, 3073, 769, 769]

In [None]:
np_x_train = np.stack(x_train).astype(np.float64)
np_x_valid = np.stack(x_valid).astype(np.float64)
np_x_train.shape, np_x_valid.shape

((3073, 1, 32000), (769, 1, 32000))

In [None]:
o2i_f = lambda x: 5*(x[0]=='m') + int(x[-1]) - 1

In [None]:
np_y_train = np.array(list(map(o2i_f, y_train)))
np_y_valid = np.array(list(map(o2i_f, y_valid)))

In [None]:
np_y_train

array([1, 7, 9, ..., 7, 8, 4])

In [None]:
np_x_train.shape, np_y_train.shape, np_x_valid.shape, np_y_valid.shape

((3073, 1, 32000), (3073,), (769, 1, 32000), (769,))

In [None]:
np_x_train.mean(), np_x_train.std()

(-4.5069507694461624e-05, 0.028227341577238208)

### Normalize the training data

In [None]:
np_x_train = (np_x_train - np_x_train.mean(axis = 2, keepdims = True)) / (np_x_train.std(axis = 2, keepdims = True) + 1e-8)
np_x_valid = (np_x_valid - np_x_valid.mean(axis = 2, keepdims = True)) / (np_x_valid.std(axis = 2, keepdims = True) + 1e-8)

In [None]:
np_x_train[0].max()

4.4730514172839015

In [None]:
np_x_train.mean(), np_x_train.std()

(-3.7392802814417085e-20, 0.9999995557310202)

In [None]:
np_x_train.dtype

dtype('float64')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X_train_tensor = torch.tensor(np_x_train, device=device)
X_valid_tensor = torch.tensor(np_x_valid, device=device)
X_train_tensor.shape

torch.Size([3073, 1, 32000])

In [None]:
_, features, seq_len = X_train_tensor.shape

In [None]:
features, seq_len

(1, 32000)

In [None]:
class ROCKET(nn.Module):
    def __init__(self, c_in, seq_len, n_kernels=10000, kss=[7, 9, 11], stride=7):
        '''
        ROCKET is a Pytorch implementation of the ROCKET methods generate_kernels and apply_kernels that can be used 
        with univariate and multivariate time series.
        Input: is a 3d torch tensor of type torch.float32. When used with univariate TS, make sure you transform
        the 2d to 3d by adding unsqueeze(1)
        c_in: number of channels in (features). For univariate c_in is 1.
        seq_len: sequence length (is the last dimension of the input)
        '''
        super().__init__()
        kss = [ks for ks in kss if ks < seq_len]
        convs = nn.ModuleList()
        for i in range(n_kernels):
            ks = np.random.choice(kss)
            dilation = 2**np.random.uniform(0, np.log2((seq_len - 1) // (ks - 1)))
            padding = int((ks - 1) * dilation // 2) if np.random.randint(2) == 1 else 0
            weight = torch.normal(0, 1, (1, c_in, ks))
            weight -= weight.mean()
            bias = 2 * (torch.rand(1) - .5)
            layer = nn.Conv1d(c_in, 1, ks, stride=stride, padding=2 * padding, dilation=int(dilation), bias=True)
            layer.weight = torch.nn.Parameter(weight, requires_grad=False)
            layer.bias = torch.nn.Parameter(bias, requires_grad=False)
            convs.append(layer)
        self.convs = convs
        self.n_kernels = n_kernels
        self.kss = kss

    def forward(self, x):
        for i in range(self.n_kernels):
            x = x.float()
            out = self.convs[i](x)
            _max = out.max(dim=-1).values
            _ppv = torch.gt(out, 0).sum(dim=-1).float() / out.shape[-1]
            cat = torch.cat((_max, _ppv), dim=-1)
            output = cat if i == 0 else torch.cat((output, cat), dim=-1)
        return output

In [None]:
model = ROCKET(features, seq_len, n_kernels=1000, kss=[7, 9, 11], stride=5).to(device)

In [None]:
%%time
X_train_tfm = model(X_train_tensor)
X_valid_tfm = model(X_valid_tensor)
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 7), normalize=True)
classifier.fit(X_train_tfm.cpu(), y_train)
classifier.score(X_valid_tfm.cpu(), y_valid)

CPU times: user 1.21 s, sys: 3.78 s, total: 4.99 s
Wall time: 1min 6s


In [None]:
from sklearn.linear_model import RidgeClassifierCV


0.988296488946684

In [None]:
def timing_test_torch(runs, candidate_lengths, stride, num_kernels, seq_length):
    times, scores = [],[]
    for i in range(runs):
        start = time.time()
        model = ROCKET(features, seq_len, n_kernels=num_kernels, kss=candidate_lengths, stride=stride).to(device)
        X_train_tfm = model(X_train_tensor)
        X_valid_tfm = model(X_valid_tensor)
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 7), normalize=True)
        classifier.fit(X_train_tfm.cpu(), y_train)
        score = classifier.score(X_valid_tfm.cpu(), y_valid)
        t = time.time()-start
        scores.append(score)
        times.append(t)
        print("Finished Run", i+1, "Score:", round(score, 3), "Time:", round(t,3))
    return times, scores

In [None]:
timing_test_torch(10, np.array((7,9,11)), stride=3, num_kernels=1000, seq_length=32000)

Finished Run 1 Score: 0.993 Time: 104.952
Finished Run 2 Score: 0.993 Time: 100.857
Finished Run 3 Score: 0.99 Time: 100.857
Finished Run 4 Score: 0.993 Time: 98.237
Finished Run 5 Score: 0.993 Time: 90.696
Finished Run 6 Score: 0.996 Time: 89.319
Finished Run 7 Score: 0.993 Time: 86.361
Finished Run 8 Score: 0.993 Time: 86.523
Finished Run 9 Score: 0.993 Time: 82.374
Finished Run 10 Score: 0.991 Time: 83.192


([104.95241570472717,
  100.85698008537292,
  100.85703492164612,
  98.23719668388367,
  90.69618582725525,
  89.31893134117126,
  86.3606321811676,
  86.52349829673767,
  82.37362384796143,
  83.1916491985321],
 [0.9934980494148244,
  0.9934980494148244,
  0.9895968790637191,
  0.9934980494148244,
  0.9934980494148244,
  0.9960988296488946,
  0.9934980494148244,
  0.9934980494148244,
  0.9934980494148244,
  0.9908972691807543])

In [None]:
X_train_tensor.device
X_valid_tensor.device

device(type='cuda', index=0)