In [21]:
import numpy as np
import pandas as pd
from scipy import stats
from statistics import mode
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import math

# imports raw csv: first row must contain time, x, y, z
df= pd.read_csv(r"C:\Users\jeong\Desktop\stand_walk_run_3,3,2.5min.csv")

# asks for activities performed with commas in between
activities= input("Enter activities: ").split(",")

# adds activity tags to each timestamp in column "ADL". moves onto next activity every time there is a time gap > 1.0 sec.(data collection is every 0.1 sec)
index = 0
for i in range(0, df.shape[0]-1):
    df.loc[df.index[i], 'ADL'] = activities[index]
    if (df.iloc[(i+1),0]-df.iloc[i,0])>1.0:
        index+=1
df.loc[df.index[df.shape[0]-1], 'ADL'] = activities[index]

# preprocessing
x_list = []
y_list = []
z_list = []
train_labels = []

# creating windows of size window-size 20: each window is 2.0 sec (microbit collects data every 0.1 sec)
window_size = 20

for i in range(0, df.shape[0] - window_size):
    xs = df['x'].values[i: i + window_size]
    ys = df['y'].values[i: i + window_size]
    zs = df['z'].values[i: i + window_size]
    label = mode(df['ADL'][i: i + window_size])
    
    x_list.append(xs)
    y_list.append(ys)
    z_list.append(zs)
    train_labels.append(label)
    
processed_df = pd.DataFrame()

# 12 total features(4*3(x,y,z))
# mean
processed_df['x_mean'] = pd.Series(x_list).apply(lambda x: x.mean())
processed_df['y_mean'] = pd.Series(y_list).apply(lambda x: x.mean())
processed_df['z_mean'] = pd.Series(z_list).apply(lambda x: x.mean())

# std dev
processed_df['x_std'] = pd.Series(x_list).apply(lambda x: x.std())
processed_df['y_std'] = pd.Series(y_list).apply(lambda x: x.std())
processed_df['z_std'] = pd.Series(z_list).apply(lambda x: x.std())

# min
processed_df['x_min'] = pd.Series(x_list).apply(lambda x: x.min())
processed_df['y_min'] = pd.Series(y_list).apply(lambda x: x.min())
processed_df['z_min'] = pd.Series(z_list).apply(lambda x: x.min())

# max
processed_df['x_max'] = pd.Series(x_list).apply(lambda x: x.max())
processed_df['y_max'] = pd.Series(y_list).apply(lambda x: x.max())
processed_df['z_max'] = pd.Series(z_list).apply(lambda x: x.max())

processed_df["ADL"] = train_labels

# splits dataframe, encode, scale
X = pd.DataFrame(processed_df.drop(['ADL'],axis=1))
Y = processed_df.ADL.values.astype(object)

encoder = preprocessing.LabelEncoder()
y = encoder.fit_transform(Y)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1000)

processed_df.drop(['ADL'],axis=1)

print("\nScaler mean: ", (scaler.mean_).tolist())
print("Scaler sd: ", list(map(math.sqrt,scaler.var_)))

# train the model: SVC with linear kernel, one vs. one
svc = SVC(kernel="linear", C=100.0, decision_function_shape='ovo') 
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)

print("\nModel accuracy score:", accuracy_score(y_test, y_pred))

# dictionary of classes (organizes in alphabetic order)
print("\nactivities: ",(encoder.inverse_transform([0,1,2])).tolist())

print("\nCoef: ",(svc.coef_).tolist())
print("k: ",(svc.intercept_).tolist())

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print('\n Model accuracy score:', accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

Enter activities: s,w,r

Scaler mean:  [-177.48559249786868, 927.1557118499572, -163.73593350383635, 313.40030982357086, 352.0293421898865, 359.9212856332229, -804.4117647058823, 282.70588235294116, -756.6419437340154, 435.17306052855923, 1532.8729752770673, 548.995737425405]
Scaler sd:  [122.81336316110112, 155.35774130311398, 172.79928738733565, 325.4941488616266, 338.8675980850038, 436.1055304406833, 661.4673896097793, 760.6671341279181, 782.6163916969433, 722.2878198389716, 422.2618853350948, 743.0749119638036]

Model accuracy score: 1.0

activities:  ['r', 's', 'w']

Coef:  [[0.09072921892541963, 0.02050763109394002, 0.07990760096870853, 0.1959510865053412, 0.24749407547432828, 0.08125397351231137, -0.15311007915439134, -0.18676970998309272, -0.10963787716854928, 0.26096017201491933, 0.6495797112134594, 0.14765281352724935], [0.9647705673104152, -0.9381255709889622, 1.0963808461538243, -0.5105358086653857, -2.8208343897965533, 2.733823393949729, -0.9638646413982466, -0.39804054554