In [1]:
import pickle as pk
import pandas as pd 
import numpy as np 
from scipy.signal import butter, sosfilt, sosfreqz

In [2]:
path = '/content/drive/My Drive/Colab Notebooks/DeepLearningAssignment/Data/'
data = open('Data_Raw_signals.pkl','rb')
data_raw = pk.load(data) 
test = open('Test_Raw_signals_no_labels.pkl','rb')
test_raw = pk.load(test)

In [3]:
data = data_raw[0]
labels = data_raw[1]
test_data = test_raw[0]

In [4]:
data = np.reshape(data,(15375,3000,2))
test_data = np.reshape(test_data,(1754,3000,2))

In [5]:
data_whole = np.concatenate((data,test_data),axis=0)

# Calculate Absolute Band Power

In [6]:
def bandpower(data, sf, band, window_sec=None, relative=False):
    from scipy.signal import welch
    from scipy.integrate import simps
    band = np.asarray(band)
    low, high = band

    # Define window length
    if window_sec is not None:
        nperseg = window_sec * sf
    else:
        nperseg = (2 / low) * sf

    # Compute the modified periodogram (Welch)
    freqs, psd = welch(data, sf, nperseg=nperseg)

    # Frequency resolution
    freq_res = freqs[1] - freqs[0]

    # Find closest indices of band in frequency vector
    idx_band = np.logical_and(freqs >= low, freqs <= high)

    # Integral approximation of the spectrum using Simpson's rule.
    bp = simps(psd[idx_band], dx=freq_res)

    if relative:
        bp /= simps(psd, dx=freq_res)
    return bp

In [7]:
delta_bd = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))

for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        bd = bandpower(sig_cD, sf=100, band=(.5,4), window_sec=None, relative=False)
        delta_bd[i,:,j] = bd

theta_bd = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        bd = bandpower(sig_cD, sf=100, band=(4,8), window_sec=None, relative=False)
        theta_bd[i,:,j] = bd
        
alpha_bd = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))

for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        bd = bandpower(sig_cD, sf=100, band=(8,13), window_sec=None, relative=False)
        alpha_bd[i,:,j] = bd


beta_bd = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))

for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        bd = bandpower(sig_cD, sf=100, band=(13,30), window_sec=None, relative=False)
        beta_bd[i,:,j] = bd
        
gamma_bd = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))

for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        bd = bandpower(sig_cD, sf=100, band=(30,44), window_sec=None, relative=False)
        gamma_bd[i,:,j] = bd

In [9]:
all_bands_pw = np.concatenate((delta_bd,theta_bd,alpha_bd,beta_bd,gamma_bd),axis=1)

# All bands Relative Power

In [10]:
delta_rpbd= np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))

for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        rpbd= bandpower(sig_cD, sf=100, band=(.5,4), window_sec=4, relative=True)
        delta_rpbd[i,:,j] = rpbd

theta_rpbd= np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        rpbd= bandpower(sig_cD, sf=100, band=(4,8), window_sec=4, relative=True)
        theta_rpbd[i,:,j] = rpbd
        
alpha_rpbd= np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))

for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        rpbd= bandpower(sig_cD, sf=100, band=(8,13), window_sec=4, relative=True)
        alpha_rpbd[i,:,j] = rpbd


beta_rpbd= np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))

for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        rpbd= bandpower(sig_cD, sf=100, band=(13,30), window_sec=4, relative=True)
        beta_rpbd[i,:,j] = rpbd
        
gamma_rpbd= np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))

for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_cD = data_whole[i,:,j]
        rpbd= bandpower(sig_cD, sf=100, band=(30,44), window_sec=4, relative=True)
        gamma_rpbd[i,:,j] = rpbd




In [12]:
all_bands_rppw = np.concatenate((delta_rpbd,theta_rpbd,alpha_rpbd,beta_rpbd,gamma_rpbd),axis=1)

# Calculate Mean of bands

In [13]:
def butter_bandpass(lowcut, highcut, fs, order=5):
        nyq = 0.5 * fs
        low = lowcut / nyq
        high = highcut / nyq
        sos = butter(order, [low, high], analog=False, btype='band', output='sos')
        return sos

def butter_bandpass_filter(signal, lowcut=0.5, highcut=4, fs=100, order=5):
        sos = butter_bandpass(lowcut, highcut, fs, order=order)
        y = sosfilt(sos, signal)
        return y

In [14]:
delta_mean = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=0.5, highcut=4, fs=100, order=5)
        signal_mean = np.mean(signal)
        delta_mean[i,:,j] = signal_mean

        
theta_mean = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=4, highcut=8, fs=100, order=5)
        signal_mean = np.mean(signal)
        theta_mean[i,:,j] = signal_mean
        
alpha_mean = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=8, highcut=13, fs=100, order=5)
        signal_mean = np.mean(signal)
        alpha_mean[i,:,j] = signal_mean
        
beta_mean = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=13, highcut=30, fs=100, order=5)
        signal_mean = np.mean(signal)
        beta_mean[i,:,j] = signal_mean

gamma_mean = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=30, highcut=44, fs=100, order=5)
        signal_mean = np.mean(signal)
        gamma_mean[i,:,j] = signal_mean

In [15]:
all_band_mean = np.concatenate((delta_mean,theta_mean,alpha_mean,beta_mean,gamma_mean),axis=1)

# Calculate std of bands

In [16]:
delta_std = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=0.5, highcut=4, fs=100, order=5)
        signal_std = np.std(signal)
        delta_std[i,:,j] = signal_std

        
theta_std = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=4, highcut=8, fs=100, order=5)
        signal_std = np.std(signal)
        theta_std[i,:,j] = signal_std
        
alpha_std = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=8, highcut=13, fs=100, order=5)
        signal_std = np.std(signal)
        alpha_std[i,:,j] = signal_std
        
beta_std = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=13, highcut=30, fs=100, order=5)
        signal_std = np.std(signal)
        beta_std[i,:,j] = signal_std

gamma_std = np.ndarray(shape = (data_whole.shape[0],1,data_whole.shape[2]))
for i in range(data_whole.shape[0]):
    for j in range(data_whole.shape[2]):
        sig_raw = data_whole[i,:,j]
        signal = butter_bandpass_filter(sig_raw, lowcut=30, highcut=44, fs=100, order=5)
        signal_std = np.std(signal)
        gamma_std[i,:,j] = signal_std


In [17]:
all_band_std = np.concatenate((delta_std,theta_std,alpha_std,beta_std,gamma_std),axis=1)

In [29]:
features = np.concatenate((all_bands_pw,all_bands_rppw),axis=2)

In [37]:
all_bands_pw = np.reshape(all_bands_pw,(all_bands_pw.shape[0],5*2))

In [38]:
df_band = pd.DataFrame(all_bands_pw)

In [44]:
all_bands_rppw = np.reshape(all_bands_rppw,(all_bands_rppw.shape[0],5*2))

In [46]:
df_rppw = pd.DataFrame(all_bands_rppw)

In [59]:
data_bdrp = pd.concat((df_band,df_rppw),axis=1)

In [60]:
data_bdrp[data_bdrp.isna().any(axis=1)]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,0.1,1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1
12213,0.0,0.0,2.156071e-58,2.156071e-58,0.0,0.0,4.211979e-57,4.211979e-57,0.0,0.0,,,,,,,,,,
12214,0.0,0.0,2.156071e-58,2.156071e-58,0.0,0.0,4.211979e-57,4.211979e-57,0.0,0.0,,,,,,,,,,
12215,0.0,0.0,2.156071e-58,2.156071e-58,0.0,0.0,4.211979e-57,4.211979e-57,0.0,0.0,,,,,,,,,,


In [62]:
train_data = data_bdrp[:15375]
test_data = data_bdrp[15375:]

In [96]:
data_df = pd.DataFrame(train_data)
data_df['labels'] = labels
data_df = data_df.sample(frac=1).reset_index(drop=True)

In [97]:
data_df = data_df.dropna()

In [98]:
data_df = data_df.drop(columns=[8, 9])

In [99]:
data_df

Unnamed: 0,0,1,2,3,4,5,6,7,0.1,1.1,2.1,3.1,4.1,5.1,6.1,7.1,labels
0,1456.569404,1457.483630,260.305635,260.738545,76.710203,77.742070,35.780358,36.022382,0.793137,0.791758,0.102153,0.103753,0.021425,0.021599,0.014825,0.014897,0
1,552.624830,551.314573,82.824698,82.108668,33.349973,33.736326,34.137337,34.503165,0.683054,0.681688,0.066114,0.065539,0.030078,0.031147,0.046942,0.047431,0
2,2770.142619,2758.908057,92.930334,98.587912,28.981087,31.451896,32.472568,42.879767,0.248466,0.246601,0.004981,0.005327,0.001212,0.001485,0.003377,0.004589,0
3,35.496402,35.245548,11.945380,12.027328,6.040333,5.994402,13.622149,13.468409,0.337733,0.336151,0.106637,0.107912,0.057838,0.058061,0.251087,0.250838,3
4,534.238661,535.926174,107.146798,108.194163,37.425358,37.757877,17.107719,17.098865,0.667013,0.666104,0.122717,0.123159,0.026531,0.027097,0.014803,0.014882,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15370,61.350380,61.542196,23.448901,23.201737,10.293787,10.212540,11.574818,11.207899,0.439743,0.442385,0.141739,0.140349,0.067826,0.066970,0.091309,0.089294,0
15371,224.090233,223.585295,44.828665,44.407581,16.607090,16.614796,14.154300,14.107415,0.429559,0.428569,0.078150,0.077588,0.018980,0.018478,0.036725,0.036360,2
15372,447.382911,448.572572,45.424568,45.475623,17.464423,17.116952,18.655999,18.502116,0.707438,0.706019,0.058855,0.059125,0.022773,0.021768,0.033593,0.034119,1
15373,1009.746315,1008.221865,144.335179,144.411304,48.840912,48.947500,37.597174,37.726314,0.697893,0.697224,0.063837,0.063607,0.021674,0.022087,0.024404,0.024817,3


In [54]:
test_df = pd.DataFrame(test_data)

In [100]:
data_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,0.1,1.1,2.1,3.1,4.1,5.1,6.1,7.1,labels
0,1456.569404,1457.48363,260.305635,260.738545,76.710203,77.74207,35.780358,36.022382,0.793137,0.791758,0.102153,0.103753,0.021425,0.021599,0.014825,0.014897,0
1,552.62483,551.314573,82.824698,82.108668,33.349973,33.736326,34.137337,34.503165,0.683054,0.681688,0.066114,0.065539,0.030078,0.031147,0.046942,0.047431,0
2,2770.142619,2758.908057,92.930334,98.587912,28.981087,31.451896,32.472568,42.879767,0.248466,0.246601,0.004981,0.005327,0.001212,0.001485,0.003377,0.004589,0
3,35.496402,35.245548,11.94538,12.027328,6.040333,5.994402,13.622149,13.468409,0.337733,0.336151,0.106637,0.107912,0.057838,0.058061,0.251087,0.250838,3
4,534.238661,535.926174,107.146798,108.194163,37.425358,37.757877,17.107719,17.098865,0.667013,0.666104,0.122717,0.123159,0.026531,0.027097,0.014803,0.014882,3


In [101]:
labels = data_df.pop('labels')


In [102]:
data = data_df[:].values

In [103]:
from sklearn.model_selection import train_test_split

X_train,X_val,Y_train,Y_val = train_test_split(data,labels, test_size=0.20, random_state=123, shuffle = False)

print(X_train.shape, Y_train.shape, X_val.shape, Y_val.shape)

(12297, 16) (12297,) (3075, 16) (3075,)


In [104]:
params_grid = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],'C': [1, 10, 100, 1000]}]

In [105]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV

In [106]:
svm_model = GridSearchCV(SVC(),params_grid, cv=5)
svm_model.fit(X_train, Y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [107]:
# View the accuracy score
print('Best score for training data:', svm_model.best_score_,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',svm_model.best_estimator_.C,"\n") 
print('Best Kernel:',svm_model.best_estimator_.kernel,"\n")
print('Best Gamma:',svm_model.best_estimator_.gamma,"\n")

Best score for training data: 0.18931446694315687 

Best C: 1 

Best Kernel: rbf 

Best Gamma: 0.0001 



In [108]:
Y_pred = svm_model.predict(X_val)

In [109]:
print(confusion_matrix(Y_val,Y_pred))
print("\n")
print(classification_report(Y_val,Y_pred))
print("Training set score for SVM: %f" % svm_model.score(X_train, Y_train))
print("Testing  set score for SVM: %f" % svm_model.score(X_val , Y_val))

[[369  30 146   3  19   4]
 [350  26 124   5  12   1]
 [423  31 131   8  14   3]
 [372  22 137   7  14   4]
 [302  22 109   7   7   5]
 [253  17  88   3   6   1]]


              precision    recall  f1-score   support

           0       0.18      0.65      0.28       571
           1       0.18      0.05      0.08       518
           2       0.18      0.21      0.19       610
           3       0.21      0.01      0.02       556
           4       0.10      0.02      0.03       452
           5       0.06      0.00      0.01       368

    accuracy                           0.18      3075
   macro avg       0.15      0.16      0.10      3075
weighted avg       0.16      0.18      0.11      3075

Training set score for SVM: 0.259901
Testing  set score for SVM: 0.175935


In [152]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib

In [153]:
classifier = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 42)
classifier.fit(X_train, Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [154]:
classifier.score(X_train, Y_train)

0.999349593495935

In [155]:
Y_pred = classifier.predict(X_val)

In [156]:
print(confusion_matrix(Y_val,Y_pred))
print("\n")
print(classification_report(Y_val,Y_pred))
print("Training set score for RF: %f" % classifier.score(X_train, Y_train))
print("Testing  set score for RF: %f" % classifier.score(X_val , Y_val))

[[180  89 169  75  40  14]
 [167  81 146  71  44  22]
 [194  99 154  75  38  22]
 [170  64 151  85  36  19]
 [146  63 146  57  36  15]
 [138  69 103  50  36  11]]


              precision    recall  f1-score   support

           0       0.18      0.32      0.23       567
           1       0.17      0.15      0.16       531
           2       0.18      0.26      0.21       582
           3       0.21      0.16      0.18       525
           4       0.16      0.08      0.10       463
           5       0.11      0.03      0.04       407

    accuracy                           0.18      3075
   macro avg       0.17      0.17      0.16      3075
weighted avg       0.17      0.18      0.16      3075

Training set score for RF: 0.999350
Testing  set score for RF: 0.177886
