In [None]:
import os
import pandas as pd
from os.path import join
from scipy.fftpack import fft
from scipy.io import wavfile
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd

In [None]:
train_audio_path = '../input/train/audio/'
labels = os.listdir(train_audio_path)
del labels[0]

In [None]:
samples = dict()
for label in labels:
    samples[label] = [f for f in os.listdir(join(train_audio_path, label)) if f.endswith('.wav')]
    print(label)
    print(len(samples[label]))

In [None]:
def custom_fft(y, fs):
    T = 1.0 / fs
    N = y.shape[0]
    yf = fft(y)
    xf = np.linspace(0.0, 1.0/(2.0*T), N//2)
    vals = 2.0/N * np.abs(yf[0:N//2])
    return xf, vals

In [None]:
import time
X = []
y = []
count = 0
for label, samp in samples.items():
    if label == '_background_noise_':
        continue
    start = time.time()
    for wav in samp:
        rate, row =  wavfile.read(train_audio_path + label + '/' + wav)
        if rate != 16000 or len(row) !=16000:
            continue
        else:
            xf, vals = custom_fft(row, 16000)
            X.append(vals)
            y.append(label)
    print(label)

In [None]:
from sklearn.model_selection import train_test_split
Xf = np.array([x for x,yy in zip(X, y) if yy in labels])
yf = np.array([yy for x,yy in zip(X, y) if yy in labels])
Xtrain, Xtest, ytrain, ytest = train_test_split(Xf, yf, test_size=0.4)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
components=101
scaler = StandardScaler()
pca = PCA(n_components=components)
svm = LinearSVC()

In [None]:
import time
from sklearn.pipeline import Pipeline
pipe=dict()
for label in labels:
    if label == '_background_noise_':
            continue
    print(label)
    start = time.time()
    y_transformed = [1 if yy==label else 0 for yy in ytrain]
    y_transformed_test=np.array(ytest==label, dtype=int)
    pipe[label] = Pipeline([('scaler', StandardScaler()),
                ('pca', PCA(n_components=components)),
                ('svm', LinearSVC())])
    pipe[label].fit(Xtrain, y_transformed)
    print(time.time() - start)
    print(pipe[label].score(Xtrain, y_transformed))
    print(pipe[label].score(Xtest, y_transformed_test))

In [None]:
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin

In [None]:
class Ensemble(BaseEstimator, ClassifierMixin):
    
    def __init__(self, clfs, weights, labels):
        self.clfs = clfs
        self.w = np.array(weights)
        self.labels = labels
        
    def R(self, x):
        if sum(x) == 0:
            return np.argmin(self.w)
        else:
            return np.argmax(self.w*x)
        
    def fit(self, x, y):
        
        pass
    
    def predict(self, x):
        res = []
        for label in self.labels:
            res.append(self.clfs[label].predict(x))
        yy = []
        for up, zero, one, bird, yes, on, off, house, six, sheila, bed, marvin,
        nine, stop, cat, down, dog, four, tree, happy, three, five, go, seven,
        left, wow, two, eight, right in zip(*res):
            yy.append(self.R([up, zero, one, bird, yes, on, off, house, six, sheila,
                              bed, marvin, nine, stop, cat, down, dog, four, tree,
                              happy, three, five, go, seven, left, wow, two, eight, right]))
        return np.array([self.labels[y] for y in yy])


In [None]:
labels_check= labels
del labels_check[labels.index['_background_noise_']]
clas = Ensemble(pipe, np.ones(len(labels_check)),labels_check)

In [None]:
clas.predict(Xtest)

In [None]:
clas.score(Xtest, ytest)