In [1]:
import tensorflow
import pandas as pd
import time
import numpy as np
import os
import matplotlib.pyplot as plt
import statistics

# tensorflow libraries
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, Conv1D
from tensorflow.keras.callbacks import Callback

# sklearn libraries are useful for preprocessing, performance measures, etc.
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

In [2]:
path = 'output/'
dir = os.listdir(path)

In [3]:
df_combined = pd.DataFrame()
count = 0
for file in dir:
    if file[-4:] == '.csv':
        df_current_file = pd.read_csv(f'{path}\\{file}')
        number_of_rows = len(df_current_file.index)
        labels = [f'{file[:-4]}' for x in range(number_of_rows)]
        df_current_file['label'] = labels
        if count == 0:
            df_combined = df_current_file
        else:
            df_combined = pd.concat([df_combined, df_current_file])
        count += 1
df_combined = df_combined.drop(axis=1, columns = ['file', 'start', 'end'])

In [4]:
def prepare_dataset(df, test_size, random_state):

    # Encode the labels from 0 to n_classes-1  
    label_encoder = preprocessing.LabelEncoder()
    df['label'] = label_encoder.fit_transform(df['label'])
  
    # devide data to train and test
    df_train, df_test = train_test_split(df, test_size=test_size, random_state = random_state)
    
    # scale the training inputs
    x_train = df_train.drop(axis = 1, columns = ['label'])
    y_train = df_train['label'].to_numpy()
    
    standard_scaler = preprocessing.StandardScaler()
    x_train_scaled = standard_scaler.fit_transform(x_train)

    #scale and prepare testing data
    x_test = df_test.drop(axis = 1, columns = ['label'])
    x_test_scaled = standard_scaler.transform(x_test)
    y_test = df_test['label'].to_numpy() 
  
    return x_train_scaled, y_train, x_test_scaled, y_test

In [5]:
X_train, y_train, X_test, y_test = prepare_dataset(df_combined, test_size=0.3, random_state=0)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1008, 988) (1008,)
(432, 988) (432,)


In [6]:
model = Sequential([
    Input(988),
    Dropout(0.5),
    Dense(663, activation = 'relu'),
    Dense(442, activation = 'relu'),
    Dense(8, activation = 'softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
batchSize = 1
epochs = 50

history = model.fit(X_train, y_train,
                    batch_size = batchSize,
                    epochs = epochs,
                    verbose = 2,
                    use_multiprocessing = True,
                    validation_data = (X_test, y_test))

Epoch 1/50
1008/1008 - 5s - loss: 2.0450 - accuracy: 0.3720 - val_loss: 1.6422 - val_accuracy: 0.4375
Epoch 2/50
1008/1008 - 5s - loss: 1.4419 - accuracy: 0.5308 - val_loss: 1.3211 - val_accuracy: 0.5440
Epoch 3/50
1008/1008 - 5s - loss: 1.1322 - accuracy: 0.6071 - val_loss: 1.5977 - val_accuracy: 0.5347
Epoch 4/50
1008/1008 - 4s - loss: 1.0187 - accuracy: 0.6766 - val_loss: 2.3453 - val_accuracy: 0.4560
Epoch 5/50
1008/1008 - 4s - loss: 0.9838 - accuracy: 0.6786 - val_loss: 1.7471 - val_accuracy: 0.5579
Epoch 6/50
1008/1008 - 5s - loss: 0.9460 - accuracy: 0.6964 - val_loss: 1.4800 - val_accuracy: 0.6019
Epoch 7/50
1008/1008 - 5s - loss: 0.8530 - accuracy: 0.7321 - val_loss: 1.4541 - val_accuracy: 0.5787
Epoch 8/50
1008/1008 - 5s - loss: 0.8322 - accuracy: 0.7708 - val_loss: 1.6787 - val_accuracy: 0.5833
Epoch 9/50
1008/1008 - 5s - loss: 0.7526 - accuracy: 0.7758 - val_loss: 2.0999 - val_accuracy: 0.5764
Epoch 10/50
1008/1008 - 5s - loss: 0.7667 - accuracy: 0.7897 - val_loss: 1.9823 - 

In [None]:
plt.xlabel('No. of epochs')
plt.ylabel('Accuracy')
plt.plot([x for x in range(0,50)], model.history.history['accuracy'], label = 'Train data')
plt.plot([x for x in range(0,50)], model.history.history['val_accuracy'], label = 'Test data')
plt.legend()