Erstes ML-Modell mit SciKit-Learn

In [None]:
import pandas as pd
import numpy as np
import sklearn as sk

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

Laden der zwei Datasets Walking, Jumping

In [None]:
df_run = pd.read_json("walking.json")

df_jump = pd.read_json("jumping.json")

In [None]:
df_run.head()

In [None]:
df_jump.head()

Extraktion von 3 lediglich Sensor Daten: Acceleration Data Uncalibrated

In [None]:
#preprocessing data
df_run['time'] = pd.to_datetime(df_run['time'])
df_jump['time'] = pd.to_datetime(df_jump['time'])

df_run.head()

Extraktion der richtigen Sensor-Daten:

In [None]:
df_run_acc = df_run[df_run['sensor'] == 'AccelerometerUncalibrated']
df_jump_acc = df_jump[df_jump['sensor'] == 'AccelerometerUncalibrated']

In [None]:
df_run = df_run.set_index('time')
df_jump = df_jump.set_index('time')

df_jump_acc.head()

In [None]:
df_run.describe()

In [None]:
df_run_acc = df_run_acc[['z','y','x']]
df_jump_acc = df_jump_acc[['z','y','x']]

Erstellung eines Trainings und Test-Datensatzes:

In [None]:
df_run.plot(figsize=(20,10))

In [None]:
df_run_acc['label'] = "run"
df_jump_acc['label'] = "jump"
df_combined = pd.concat([df_run_acc, df_jump_acc], ignore_index=True)

In [None]:
#Anzahl der Reihen RowCount
len(df_run_acc)
#Label hinzufügen

display(df_combined)

In [None]:
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(df_combined):
    X_train, X_test = df_combined.iloc[train_index][['z','y','x']], df_combined.iloc[test_index][['z','y','x']]
    y_train, y_test = df_combined.iloc[train_index]["label"], df_combined.iloc[test_index]["label"]

    # Skalieren 
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Passe Klassifikator an
    clf = RandomForestClassifier(random_state=0)
    clf.fit(X_train, y_train)

    # Evaluieren die Leistung des Klassifikators
    y_pred = clf.predict(X_test)
    print(clf.predict(X_test))
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")

    

In [None]:
#Test via Konfusionsmatrix
y_pred = clf.predict(X_test)
accuracy = sk.metrics.confusion_matrix(y_test, y_pred)
print(accuracy)