In [19]:
import pandas as pd
import os
import sklearn.model_selection as sk_model_selection
import sklearn.preprocessing as sk_preprocessing
import sklearn.metrics as sk_metrics
import sklearn.neural_network as sk_nn
import matplotlib.pyplot as plt

In [20]:
data_dir = './harth'

files = os.listdir(data_dir)

concat = pd.DataFrame()

for root, _, files in os.walk(data_dir):
    for file in files:
        new = pd.read_csv(os.path.join(root, file), index_col = 'timestamp')
        file_name = os.path.splitext(file)[0]
        new['file_name'] = file_name
        concat = pd.concat([concat, new])
label = concat['label']
#concat.drop(labels = ['index', 'Unnamed: 0'], axis = 'columns', inplace = True)
concat.drop(labels = ['index'], axis = 'columns', inplace = True)
concat.reset_index(inplace = True)
concat['timestamp'] = pd.to_datetime(concat['timestamp']).apply(lambda x: x.timestamp())
time = concat['timestamp']

In [18]:
#X = concat.drop(columns = ['timestamp', 'label', 'file_name']).dropna()
#y = concat.loc[X.index, 'label']
X = concat[['timestamp', 'back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y', 'thigh_z']]
y = concat['label']
print(y)
scaler = sk_preprocessing.MinMaxScaler(feature_range = (-1,1))
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = sk_model_selection.train_test_split(X_scaled, y, test_size=0.2, random_state=42)

mlp_classifier = sk_nn.MLPClassifier(hidden_layer_sizes=(64, 64), activation='relu', max_iter=1000)
mlp_classifier.fit(X_train, y_train)

y_pred = mlp_classifier.predict(X_test)

accuracy = sk_metrics.accuracy_score(y_test, y_pred)

print('Test accuracy: {:0.2f}'.format(accuracy))


0          6
1          6
2          6
3          6
4          6
          ..
6461323    3
6461324    3
6461325    3
6461326    3
6461327    3
Name: label, Length: 6461328, dtype: int64




Test accuracy: 0.88


In [14]:
cm = sk_metrics.confusion_matrix(y_test, y_pred)
results_df = pd.DataFrame({'y_test': y_test, 'y_pred': y_pred})

def categorize_prediction(row):
    if row['y_test'] == cls and row['y_pred'] == cls:
        return 'TP'
    elif row['y_test'] != cls and row['y_pred'] == cls:
        return 'FP'
    elif row['y_test'] == cls and row['y_pred'] != cls:
        return 'FN'
    else:
        return 'TN'
results_df['category'] = results_df.apply(categorize_prediction, axis=1)

summary = []
classes = sorted(set(y))
print(classes)
for cls in classes:
    tp = ((results_df['y_test'] == cls) & (results_df['y_pred'] == cls)).sum()
    fp = ((results_df['y_test'] != cls) & (results_df['y_pred'] == cls)).sum()
    fn = ((results_df['y_test'] == cls) & (results_df['y_pred'] != cls)).sum()
    tn = ((results_df['y_test'] != cls) & (results_df['y_pred'] != cls)).sum()
    summary.append({'Label': cls, 'TP': tp, 'FP': fp, 'FN': fn, 'TN': tn})

summary_df = pd.DataFrame(summary)

# Display the summary DataFrame
print(summary_df)

[1, 2, 3, 4, 5, 6, 7, 8]
   Class     TP   FP   FN     TN
0      1  13791  557  369  12813
1      2   4359   32   72  23067
2      3    104   72  297  27057
3      4    535   70  113  26812
4      5    446  130   48  26906
5      6   1304  215  150  25861
6      7   2587    4   19  24920
7      8   3317    7   19  24187


In [None]:
fig, ax = plt.subplots(figsize=(12, 8))

# Plotting bars
bar_width = 0.2
index = summary_df['Label']

bar1 = plt.bar(index, summary_df['TP'], bar_width, label='True Positives')
bar2 = plt.bar(index + bar_width, summary_df['FP'], bar_width, label='False Positives')
bar3 = plt.bar(index + 2 * bar_width, summary_df['FN'], bar_width, label='False Negatives')
bar4 = plt.bar(index + 3 * bar_width, summary_df['TN'], bar_width, label='True Negatives')

# Add labels
plt.xlabel('Label')
plt.ylabel('Count')
plt.title('True Positives, False Positives, False Negatives, and True Negatives by Class')
plt.xticks(index + bar_width, summary_df['Class'])
plt.legend()

# Display the plot
plt.show()