In [1]:
import numpy as np
import pandas as pd

from scipy.signal import resample, stft
from sys import getsizeof
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from scipy.fftpack import fft

In [2]:
%%time
X_train_resamp_data = np.load('./X_train_resamp_data.npy')
X_test_resamp_data  = np.load('./X_test_resamp_data.npy')
y_test_resamp       = np.load('./y_test_resamp.npy')
y_train_resamp      = np.load('./y_train_resamp.npy')

CPU times: user 3.09 ms, sys: 795 ms, total: 798 ms
Wall time: 1.9 s


In [3]:
X_train_resamp_data.shape, X_test_resamp_data.shape, y_test_resamp.shape, y_train_resamp.shape

((840, 800000), (210, 800000), (210,), (840,))

In [4]:
n_fft = 256
X_train_fft = fft(X_train_resamp_data, n_fft)
X_test_fft  = fft(X_test_resamp_data, n_fft) 


  x = x[index]


In [5]:
X_train_fft.shape

(840, 256)

In [74]:
params1 = {
    'min_samples_split': [2,3,5,7,10,13]
}

In [75]:
gs1 = GridSearchCV(RandomForestClassifier(n_estimators=100), n_jobs = 6, verbose = 2,
                  param_grid = params1, 
                  return_train_score = True,
                  cv = 5)

In [94]:
gs1.fit(np.abs(X_train_fft), y_train_resamp)
gs1.best_params_

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  30 out of  30 | elapsed:    4.9s finished


{'min_samples_split': 3}

In [95]:
gs1.score(np.abs(X_train_fft), y_train_resamp)

1.0

In [96]:
gs2.score(np.abs(X_test_fft), y_test_resamp)

0.8

In [68]:
X_train_stft = np.array([Zxx for _, _, Zxx in 
                map(lambda x: stft(x, nperseg=128, noverlap=16) , X_train_resamp_data)])

In [69]:
X_test_stft = np.array([Zxx for _, _, Zxx in 
                map(lambda x: stft(x, nperseg=128, noverlap=16) , X_test_resamp_data)])

In [70]:
X_test_stft.shape

(210, 65, 7144)

In [71]:
X_test_stft = X_test_stft.reshape(210, -1)

In [72]:
X_train_stft = X_train_stft.reshape(840, -1)

In [56]:
params2 = {
    'min_samples_split': [2,3,5,7,10,13]
}

In [57]:
gs2 = GridSearchCV(ExtraTreesClassifier(n_estimators=100), n_jobs = 6, verbose = 2,
                  param_grid = params2, 
                  return_train_score = True,
                  cv = 5)

In [59]:
gs2.fit(np.abs(X_train_fft), y_train_resamp)
gs2.best_params_

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  30 out of  30 | elapsed:    4.1s finished


{'min_samples_split': 13}

In [60]:
gs2.score(np.abs(X_train_fft), y_train_resamp)

1.0

In [61]:
gs2.score(np.abs(X_test_fft), y_test_resamp)

0.8

In [62]:
Zxx_train.shape

(840, 513, 1564)

In [63]:
Zxx_train = Zxx_train.reshape(840, -1)

In [66]:
Zxx_test.shape

(210, 513, 1564)

In [67]:
Zxx_test = Zxx_test.reshape(210, -1)

In [68]:
type(Zxx_test[5,5])

numpy.complex64

In [80]:
np.arange(1,100,10)

array([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [81]:
params3 = {
    'min_samples_split' : np.arange(2,150,10)
                              }

In [82]:
gs3 = RandomizedSearchCV(ExtraTreesClassifier(n_estimators=100), n_jobs = 6, verbose = 2,
                  param_distributions = params3, 
                  return_train_score = True,
                  cv = 5)

In [83]:
gs3.fit(np.abs(X_train_fft), y_train_resamp)
gs3.best_params_

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  29 tasks      | elapsed:    2.3s
[Parallel(n_jobs=6)]: Done  50 out of  50 | elapsed:    2.9s finished


{'min_samples_split': 12}

In [84]:
gs3.score(np.abs(X_train_fft), y_train_resamp)

1.0

In [85]:
gs3.score(np.abs(X_test_fft), y_test_resamp)

0.7952380952380952

In [None]:
gs1.fit(np.abs(X_train_stft), y_train_resamp)
print(gs1.best_params_)
print(gs1.score(np.abs(X_train_stft), y_train_resamp))
print(gs1.score(np.abs(X_test_stft), y_test_resamp))

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
