In [1]:
# SEP769 - July 27, 2021
# Amir Kamaleddine, Ernani Fantinatti, Mohammed Ibraheem
# Performing feature selection
# https://github.com/efantinatti/MAFAULDA_SEP769


In [2]:
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense
from keras.models import Sequential
from tensorflow.keras.models import Model
from sklearn.feature_selection import SelectKBest, chi2
import datetime
import numpy as np

In [4]:
# For Google Colab only
# Pulling processed data from public folder
# Credit to this stack overflow forum: https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
dir = 'C:/Users/KOMPUTER/Documents/Jupyter Notebooks/MAFAULDA_SEP769-main/Models/content/jul27data/'


In [5]:
train_data = np.loadtxt(dir +'/fsData/train_data.csv', delimiter=",")
test_data = np.loadtxt(dir +'/fsData/test_data.csv', delimiter=",")
train_label = np.loadtxt(dir +'/fsData/trian_label.csv', delimiter=",")
test_label = np.loadtxt(dir +'/fsData/test_label.csv', delimiter=",")

In [6]:
scaler = MinMaxScaler(feature_range=(0, 1))
train_data = scaler.fit_transform(train_data)
test_data = scaler.fit_transform(test_data)

In [7]:
print(train_data.shape,train_label.shape, test_data.shape, test_label.shape)

(382000, 8) (382000,) (76400, 8) (76400,)


In [8]:
selection = SelectKBest(score_func=chi2, k=6)
selection.fit(train_data, train_label)
new_train_data = selection.transform(train_data)
new_test_data = selection.transform(test_data)

In [9]:
features = ["rotation frequency" , "underhang accelerometer 1", "underhang accelerometer 2", "underhang accelerometer 3","overhang accelerometer 1", "overhang accelerometer 2", "overhang accelerometer 3" ,"microphone"]

In [10]:
for i in range(0,8):
  print(features[i]+": ", selection.scores_[i])

rotation frequency:  7.606459726406875
underhang accelerometer 1:  0.8530206010997341
underhang accelerometer 2:  0.08849307594956664
underhang accelerometer 3:  0.11528300515965707
overhang accelerometer 1:  4.836199452160686
overhang accelerometer 2:  24.70542988027746
overhang accelerometer 3:  0.5749444747244976
microphone:  6.658416939852743e-05


In [11]:
new_train_data.shape, new_test_data.shape

((382000, 6), (76400, 6))

In [12]:
model = Sequential()
model.add(Dense(60, input_dim=8, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 60)                540       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1952      
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 264       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 9         
Total params: 2,765
Trainable params: 2,765
Non-trainable params: 0
_________________________________________________________________


In [13]:
history = model.fit(train_data, train_label, epochs=5, batch_size=50, validation_data=(test_data, test_label), verbose=2)

Epoch 1/5
7640/7640 - 7s - loss: 0.2650 - accuracy: 0.9002 - val_loss: 0.3440 - val_accuracy: 0.8955
Epoch 2/5
7640/7640 - 7s - loss: 0.2046 - accuracy: 0.9232 - val_loss: 0.3531 - val_accuracy: 0.8858
Epoch 3/5
7640/7640 - 8s - loss: 0.1945 - accuracy: 0.9279 - val_loss: 0.3739 - val_accuracy: 0.8877
Epoch 4/5
7640/7640 - 8s - loss: 0.1894 - accuracy: 0.9297 - val_loss: 0.3661 - val_accuracy: 0.8875
Epoch 5/5
7640/7640 - 7s - loss: 0.1861 - accuracy: 0.9310 - val_loss: 0.2643 - val_accuracy: 0.9062


In [14]:
test_accuracy = model.evaluate(test_data, test_label)



In [15]:
model2 = Sequential()
model2.add(Dense(60, input_dim=6, activation='relu'))
model2.add(Dense(32, activation='relu'))
model2.add(Dense(8, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))
model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 60)                420       
_________________________________________________________________
dense_5 (Dense)              (None, 32)                1952      
_________________________________________________________________
dense_6 (Dense)              (None, 8)                 264       
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 9         
Total params: 2,645
Trainable params: 2,645
Non-trainable params: 0
_________________________________________________________________


In [16]:
history = model2.fit(new_train_data, train_label, epochs=5, batch_size=50, validation_data=(new_test_data, test_label), verbose=2)

Epoch 1/5
7640/7640 - 7s - loss: 0.2851 - accuracy: 0.8946 - val_loss: 0.2544 - val_accuracy: 0.9062
Epoch 2/5
7640/7640 - 8s - loss: 0.2141 - accuracy: 0.9199 - val_loss: 0.3064 - val_accuracy: 0.8875
Epoch 3/5
7640/7640 - 8s - loss: 0.2068 - accuracy: 0.9229 - val_loss: 0.2967 - val_accuracy: 0.8947
Epoch 4/5
7640/7640 - 8s - loss: 0.2032 - accuracy: 0.9240 - val_loss: 0.2862 - val_accuracy: 0.8888
Epoch 5/5
7640/7640 - 7s - loss: 0.2002 - accuracy: 0.9251 - val_loss: 0.2706 - val_accuracy: 0.8979


In [17]:
test_accuracy = model2.evaluate(new_test_data, test_label)



In [18]:
preds = model.predict(test_data) # label scores 

classpreds = np.argmax(preds, axis=0) # predicted classes 

y_testclass = np.argmax(test_label, axis=0) # true classes

n_classes=2 # number of classes

In [19]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# Confusion Matrix
c_names = ['normal', 'unbalance']
cm = confusion_matrix(y_testclass, classpreds)
print(cm)

# Get percentage value for each element of the matrix
cm_sum = np.sum(cm, axis=1, keepdims=True)
cm_perc = cm / cm_sum.astype(float) * 100
annot = np.empty_like(cm).astype(str)
nrows, ncols = cm.shape
for i in range(nrows):
    for j in range(ncols):
        c = cm[i, j]
        p = cm_perc[i, j]
        if i == j:
            s = cm_sum[i]
            annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)
        elif c == 0:
            annot[i, j] = ''
        else:
            annot[i, j] = '%.1f%%\n%d' % (p, c)

# Display confusion matrix 
df_cm = pd.DataFrame(cm, index = c_names, columns = c_names)
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
fig, ax = plt.subplots(figsize=(10,7))
sns.heatmap(df_cm, annot=annot, fmt='')

TypeError: Singleton array 9800 cannot be considered a valid collection.