In [1]:
import numpy as np
import pandas as pd
import xlrd

import matplotlib.pyplot as plt
from numpy.fft import fft, fftfreq

from sklearn.model_selection import train_test_split

def split(X_, index):
    y_ = [index]*len(X_)
    return train_test_split(X_, y_, test_size=0.40, random_state=42)

import glob

def get_fft_set(file_names):
    fft_set = []
    for files in file_names:
        data = pd.read_csv(files)
        fft_data = fft_from_data_frame(data)
        fft_set = fft_data + fft_set
    return fft_set

def fft_from_data_frame(data_frame):
    fs= 1.14e6
    signal_set = []
    nan_indexes = np.where(np.any(np.isnan(data_frame.values), axis=1))
    data_frame_values = np.delete(data_frame.values, nan_indexes, axis=0)
    for row in data_frame_values:
        fft_data = fft(row, n=row.size)/row.size
        freq = fftfreq(row.size, d=1/fs)
        cut_high_signal = abs(fft_data).copy()
        cut_high_signal[(freq > 50000)] = 0
        cut_high_signal[(freq < 30000)] = 0
        signal_without_0 = list(filter(lambda a: a != 0, cut_high_signal))
        signal_set.append(np.abs(signal_without_0))
    return signal_set

car_side = glob.glob('../data/1mhz_data/result/26_Ford_Black_side/*_overall.csv')
wall = glob.glob('../data/1mhz_data/result/Wall/*_overall.csv')
pillar = glob.glob('../data/1mhz_data/result/Pillar/*_overall.csv')
human = glob.glob('../data/1mhz_data/result/Human/*_overall.csv')

car_side_fft = get_fft_set(car_side)
wall_fft = get_fft_set(wall)
pillar_fft = get_fft_set(pillar)
human_fft = get_fft_set(human)

print("Car: ", len(car_side_fft))
print("Wall: ", len(wall_fft))
print("Pillar: ", len(pillar_fft))
print("Human: ", len(human_fft))

result = len(car_side_fft) + len(wall_fft) + len(pillar_fft) + len(human_fft)
print("Total data: ", result)

Car:  5851
Wall:  5456
Pillar:  5772
Human:  3700
Total data:  20779


In [3]:
human = human_fft
X_Human_train, X_Human_test, y_Human_train, y_Human_test = split(human, 'HUMAN')

Non_Human = car_side_fft + wall_fft + pillar_fft
X_Non_Human_train, X_Non_Human_test, y_Non_Human_train, y_Non_Human_test = split(Non_Human, 'NOT HUMAN')

X_Human_Non_Human_train = X_Human_train + X_Non_Human_train
# X_Human_Non_Human_test = np.nan_to_num(X_Human_test + X_Non_Human_test)
X_Human_Non_Human_test = (X_Human_test + X_Non_Human_test)

y_Human_Non_Human_train = y_Human_train + y_Non_Human_train
y_Human_Non_Human_test = y_Human_test + y_Non_Human_test

# np.nan_to_num(X)

print(np.array(X_Human_train).shape,np.array(X_Human_test).shape, np.array(y_Human_train).shape, np.array(y_Human_test).shape)
# print(np.array(X_Non_Human_train).shape, np.array(X_Non_Human_test).shape, np.array(y_Non_Human_train).shape, np.array(y_Non_Human_test).shape)
# print(len(X_Human_Non_Human_train))
# print(len(X_Human_Non_Human_test))
# print(len(y_Human_Non_Human_train))
# print(len(y_Human_Non_Human_test))


(2220, 36) (1480, 36) (2220,) (1480,)


In [8]:
from sklearn import preprocessing
def custom_normalization(X_set):
    new_X_set = []
    for X in X_set:
        min = np.min(X)
        max = np.max(X)
        value = max - min
        data_set = []
        for data in X:
           data_set.append(((data - min) / value) + 0)
        new_X_set.append(data_set)
    return new_X_set

normalized_X_Human_Non_Human_train = custom_normalization(X_Human_Non_Human_train)
normalized_X_Human_Non_Human_test = custom_normalization(X_Human_Non_Human_test)


print('X train data : ',np.array(X_Human_Non_Human_train).shape)
print('y train data : ',np.array(y_Human_Non_Human_train).shape)
print('X test data : ',np.array(X_Human_Non_Human_test).shape)
print('y test data : ',np.array(y_Human_Non_Human_test).shape)

X train data :  (12467, 36)
y train data :  (12467,)
X test data :  (8312, 36)
y test data :  (8312,)


In [5]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, confusion_matrix

def create_confusion_matrix(y_test, result):
    cm = confusion_matrix(y_test, result)
    import seaborn as sns
    import matplotlib.pyplot as plt     
    sum = np.sum(cm, axis=1)
    score = accuracy_score(y_test, result)

    from sklearn.metrics import precision_score, recall_score, f1_score
    precision_CLASS_A = round(precision_score(y_test, result, average='binary',pos_label=labels[0]),2)
    precision_CLASS_B = round(precision_score(y_test, result, average='binary',pos_label=labels[1]),2)
    recall_CLASS_A = round(recall_score(y_test, result, average='binary',pos_label=labels[0]),2)
    recall_CLASS_B = round(recall_score(y_test, result, average='binary',pos_label=labels[1]),2)
    f1_CLASS_A = round(f1_score(y_test, result, average='binary',pos_label=labels[0]),2)
    f1_CLASS_B = round(f1_score(y_test, result, average='binary',pos_label=labels[1]),2)
    f1_average = round((f1_CLASS_A + f1_CLASS_B)/2, 2);
    print('Precision: Class A',precision_CLASS_A)
    print('Precision: Class B',precision_CLASS_B)
#     print('Recall: Class A',recall_CLASS_A)
#     print('Recall: Class B',recall_CLASS_B)
#     print('F1-Score: Class A',f1_CLASS_A)
#     print('F1-Score: Class B',f1_CLASS_B)
#     print('Average F1-score:', f1_average)

    cm_new = np.append(cm[0], recall_CLASS_A)
    cm_new2 = np.append(cm[1], recall_CLASS_B)
    cm_new3 = np.array([precision_CLASS_A, precision_CLASS_B, score])
    cm = np.array([cm_new,cm_new2,cm_new3])


    fig, ax = plt.subplots(figsize=(10,10))
    sns.heatmap(cm, annot=True, ax = ax,linewidths=.5,fmt='g',cmap="Reds"); #annot=True to annotate cells

    # labels, title and ticks
    ax.set_xlabel('Predicted labels');
    ax.set_ylabel('True labels'); 
    ax.set_title(title); 
    counter = 0
    for i in range(0,2):
        for j in range(0,3):
            percentage = cm[i][j]/sum[i]
            t = ax.texts[counter]
            if j == 2:
                t.set_text(str(cm[i][j]))
            else:
                t.set_text(str(cm[i][j]) + '\n' + str(round(percentage*100,2)) + " %")
            counter = counter + 1

    ax.xaxis.set_ticklabels(labels)
    ax.yaxis.set_ticklabels(labels);

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier

human_validate = custom_normalization(human)
Non_Human_validate = custom_normalization(Non_Human)
X_validate = Non_Human_validate + human_validate
y_validate = ['NON_HUMAN']* len(Non_Human_validate) + ['HUMAN']* len(human_validate)
result = []
for i in range(1,90):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(i), random_state=12)
    scores = cross_val_score(clf, X_validate, y_validate, cv=3, scoring="accuracy")
    result.append({
        'neurons': i,
        'layer': 1,
        'score': scores.mean(),
        'cross_fold_size': 3,
        'random_state': 12
    })
    print("hidden layer", i, "score average", scores.mean())

hidden layer 1 score average 0.9057218935814876
hidden layer 2 score average 0.9062515289634824
hidden layer 3 score average 0.8732356933866501
hidden layer 4 score average 0.8723691701599465
hidden layer 5 score average 0.8254047965775619
hidden layer 6 score average 0.8165024407902313
hidden layer 7 score average 0.7691907645207187
hidden layer 8 score average 0.8085553949349239
hidden layer 9 score average 0.8027833211280274
hidden layer 10 score average 0.8377713971408972
hidden layer 11 score average 0.7645194402031232
hidden layer 12 score average 0.7624069502613667
hidden layer 13 score average 0.8160117829038812
hidden layer 14 score average 0.8316542161706915
hidden layer 15 score average 0.8204452055408407
hidden layer 16 score average 0.793395247382939
hidden layer 17 score average 0.779053043619236
hidden layer 18 score average 0.8154409606067027
hidden layer 19 score average 0.776453126546116
hidden layer 20 score average 0.7732826928816657
hidden layer 21 score average 0.

In [31]:
result_df = pd.DataFrame(sorted(result, key=lambda x: x['score'], reverse=True))
result_df.to_csv('../data/1mhz_data/ml_test/layer1_neurons_test.csv')

Unnamed: 0,cross_fold_size,layer,neurons,random_state,score
0,3,1,2,12,0.906252
1,3,1,1,12,0.905722
2,3,1,3,12,0.873236
3,3,1,4,12,0.872369
4,3,1,5,12,0.825405
5,3,1,6,12,0.816502
6,3,1,8,12,0.808555
7,3,1,9,12,0.802783
8,3,1,7,12,0.769191
