In [133]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
from numpy.fft import fft, fftfreq, ifft, rfft

stopper_data = pd.read_csv('./data/all/Stopper_train_80_to_180.csv', header=None)
wall_data = pd.read_csv('./data/all/Wall_Collection_train_80_to_180.csv', header=None)

In [141]:
def fft_from_data_frame(data_frame):
    fs= 1e6
    signal_set = []
    for index, row in data_frame.iterrows():
        fft_data = fft(row, n=row.size)/row.size
        freq = fftfreq(row.size, d=1/fs)
        cut_high_signal = np.abs(fft_data).copy()
        cut_high_signal[(freq > 50000)] = 0
        cut_high_signal[(freq < 30000)] = 0
        signal_set.append(np.abs(cut_high_signal))
    return list(filter(lambda a: a != 0, signal_set))

In [142]:
stopper_fft = fft_from_data_frame(stopper_data)
wall_fft = fft_from_data_frame(wall_data)
X = stopper_fft + wall_fft
y = [1]*len(stopper_fft) + [2]*len(wall_fft)
X = np.array(X)
y = np.array(y)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [156]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

# kf = KFold(n_splits=8)
kf = KFold(n_splits=4, shuffle=True)
from sklearn import metrics

for i in range(1,10):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(i), random_state=1)
    score = np.array([])
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        clf.fit(X_train,y_train)
        y_clf_result = clf.predict(X_test)
        score = np.append(score, [accuracy_score(y_clf_result, y_test)])
        pred = clf.predict_proba(X_test)[::,1]
    print("hidden layer:",i, "and accuracy score: ",np.average(score))

hidden layer: 1 and accuracy score:  0.9795081967213115
hidden layer: 2 and accuracy score:  0.9815573770491803
hidden layer: 3 and accuracy score:  0.9825819672131149
hidden layer: 4 and accuracy score:  0.9856557377049181
hidden layer: 5 and accuracy score:  0.9723360655737705
hidden layer: 6 and accuracy score:  0.5276639344262295
hidden layer: 7 and accuracy score:  0.9774590163934426
hidden layer: 8 and accuracy score:  0.9825819672131146
hidden layer: 9 and accuracy score:  0.9825819672131147


In [162]:
from sklearn.model_selection import cross_val_score

for i in range(1,13):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(10,i,), random_state=1)
    scores = cross_val_score(clf, X, y, cv=4, scoring="accuracy")
    print("hidden layer", i, "score average", scores.mean())

hidden layer 1 score average 0.9703118739906406
hidden layer 2 score average 0.5276641581546604
hidden layer 3 score average 0.9682248835579567
hidden layer 4 score average 0.5276641581546604
hidden layer 5 score average 0.9856556000258837
hidden layer 6 score average 0.9825776475334113
hidden layer 7 score average 0.9682249523974739
hidden layer 8 score average 0.9784960493001087
hidden layer 9 score average 0.978508595302116
hidden layer 10 score average 0.9078111511757101
hidden layer 11 score average 0.9467295377977137
hidden layer 12 score average 0.9651594082279744


In [148]:
from sklearn.model_selection import cross_val_score

for i in range(5,12):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(i,), random_state=1,activation="tanh")
    scores = cross_val_score(clf, X, y, cv=4, scoring="accuracy")
    print("hidden layer", i, "score average", scores.mean())

hidden layer 5 score average 0.9846268278612801
hidden layer 6 score average 0.9856556344456423
hidden layer 7 score average 0.9887294049374455
hidden layer 8 score average 0.9846435902837152
hidden layer 9 score average 0.9846477722843843
hidden layer 10 score average 0.9887335869381146
hidden layer 11 score average 0.9836190001197808


In [132]:
from sklearn.model_selection import cross_val_score

for i in range(5,12):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(i,), random_state=1,activation="logistic")
    scores = cross_val_score(clf, X, y, cv=4, scoring="accuracy")
    print("hidden layer", i, "score average", scores.mean())

[1, 3, 4, 5]


In [149]:
from sklearn.model_selection import cross_val_score

for i in range(5,12):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(i,), random_state=1,activation="identity")
    scores = cross_val_score(clf, X, y, cv=4, scoring="accuracy")
    print("hidden layer", i, "score average", scores.mean())

hidden layer 5 score average 0.9825943755360877
hidden layer 6 score average 0.9774504114537942
hidden layer 7 score average 0.9836064196980149
hidden layer 8 score average 0.9825860115347496
hidden layer 9 score average 0.9825860115347496
hidden layer 10 score average 0.9866844066102458
hidden layer 11 score average 0.982581863953839


In [157]:
from sklearn import preprocessing
normalized_X = preprocessing.normalize(X)

In [None]:
train, predict = train_test_split(all_dataframe, test_size=0.3,random_state=42)
train.to_csv('./final_data/%s_train_%s_to_%s.csv'%(filename,min,max), index=False, header=None)
predict.to_csv('./final_data/%s_predict_%s_to_%s.csv'%(filename,min,max), index=False, header=None)