In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn import metrics
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn import preprocessing
import glob
from scipy import stats
import datetime as dt

In [None]:
ACC = pd.read_csv('46343_acceleration.txt', sep = ' ',names=['timedelta', 'accX', 'accY', 'accZ'])
HeartR = pd.read_csv('46343_heartrate.txt', sep = ',',names=['timedelta', 'heartrate'])
SleepL = pd.read_csv('46343_labeled_sleep.txt', sep = ' ',names=['timedelta', 'sleep'])

In [None]:
ACC

In [None]:
HeartR

In [None]:
SleepL

In [None]:
ACC_max_date = ACC['timedelta'].max()
ACC_min_date = ACC['timedelta'].min()
HeartR_max_date = HeartR['timedelta'].max()
HeartR_min_date = HeartR['timedelta'].min()
SleepL_max_date = SleepL['timedelta'].max()
SleepL_min_date = SleepL['timedelta'].min()
print('ACC start: ',ACC_min_date,'ACC end: ',ACC_max_date)
print('HeartR start: ',HeartR_min_date,'HeartR end: ',HeartR_max_date)
print('SleepL start: ',SleepL_min_date,'SleepL end: ',SleepL_max_date)

In [None]:
# select only intersected timedelta (ACC, HeartR, SleepL)
ACC_new = ACC[(ACC['timedelta'] > ACC_min_date) &(ACC['timedelta'] < ACC_max_date) &(ACC['timedelta'] > HeartR_min_date) &(ACC['timedelta'] < HeartR_max_date)&(ACC['timedelta'] > SleepL_min_date) &(ACC['timedelta'] < SleepL_max_date)]
HeartR_new = HeartR[(HeartR['timedelta'] > ACC_min_date) &(HeartR['timedelta'] < ACC_max_date) &(HeartR['timedelta'] > HeartR_min_date) &(HeartR['timedelta'] < HeartR_max_date)&(HeartR['timedelta'] > SleepL_min_date) &(HeartR['timedelta'] < SleepL_max_date)]
SleepL_new = SleepL[(SleepL['timedelta'] > ACC_min_date) &(SleepL['timedelta'] < ACC_max_date) &(SleepL['timedelta'] > HeartR_min_date) &(SleepL['timedelta'] < HeartR_max_date)&(SleepL['timedelta'] > SleepL_min_date) &(SleepL['timedelta'] < SleepL_max_date)]

In [None]:
ACC_new

In [None]:
HeartR_new

In [None]:
SleepL_new

In [None]:
# Convert to datetime and round to second,
ACC_new['timedelta'] = pd.DataFrame(pd.to_timedelta(ACC_new['timedelta'], 'seconds').round('1s'))
HeartR_new['timedelta'] = pd.DataFrame(pd.to_timedelta(HeartR_new['timedelta'], 'seconds').round('1s'))
SleepL_new['timedelta'] = pd.DataFrame(pd.to_timedelta(SleepL_new['timedelta'], 'seconds').round('1s'))

In [None]:
ACC_new

In [None]:
# Average rounding duplicated time
df_acc_X = ACC_new.groupby('timedelta')['accX'].mean()
df_acc_Y = ACC_new.groupby('timedelta')['accY'].mean()
df_acc_Z = ACC_new.groupby('timedelta')['accZ'].mean()

In [None]:
ACC_new2=pd.concat([df_acc_X, df_acc_Y, df_acc_Z], axis=1).reset_index()
ACC_new2

In [None]:
print('--------Before convert datetime and round and average to 1s---------')
print(ACC)
print('--------After convert datetime and round and average to 1s---------')
print(ACC_new2)


In [None]:
# Resampling every 1s with median with ffill
resample_rule = '1s'
HeartR_new2 = HeartR_new.set_index('timedelta').resample(resample_rule,).median().ffill()
HeartR_new2

In [None]:
df_heartrate = HeartR_new.groupby('timedelta')['heartrate'].mean()
#Resampling every 1s with median with ffill
HeartR_new2 = pd.concat([df_heartrate], axis=1).reset_index()
HeartR_new2['timedelta'] = HeartR_new2['timedelta']-HeartR_new2['timedelta'].min()
HeartR_new2

In [None]:
# Resampling every 1s with median with ffill
resample_rule = '1s'
SleepL_new2 = SleepL_new.set_index('timedelta').resample(resample_rule,).median().ffill()
SleepL_new2

In [None]:
df_SleepL = SleepL_new.groupby('timedelta')['sleep'].mean()
SleepL_new2 = pd.concat([df_SleepL], axis=1).reset_index()
SleepL_new2['timedelta'] = SleepL_new2['timedelta']-SleepL_new2['timedelta'].min()
SleepL_new2

In [None]:
# ------------Merge All Data -------------------------------
df = []
df = pd.merge_asof(ACC_new2, HeartR_new2, on='timedelta')
df = pd.merge_asof(df, SleepL_new2, on = 'timedelta')
df

In [None]:
df['heartrate'].fillna(df['heartrate'].median())
df['sleep'].fillna(0)
df = df.drop(columns='timedelta')
df

In [None]:
# Standardized data
feature_columns = ['accX', 'accY', 'accZ', 'heartrate']
label_columns = ['sleep']
df_feature = df[feature_columns]
scaler = preprocessing.StandardScaler()
df_feature = pd.DataFrame(scaler.fit_transform(df_feature.values),index = df_feature.index,columns=df_feature.columns)
df_label = df[label_columns]

In [None]:
df_feature

In [None]:
df_label

In [None]:
df_feature.plot.line()

In [None]:
df_label.plot.line()

In [None]:
X = df_feature
X

In [None]:
y = df_label
y

In [None]:
seed = 42
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=seed)

In [None]:
# Model Traing Parameter
# Create SVC model
c_val = 100 
gmm =0.1
d = 2

In [None]:
# Model initialize
svc_lin = SVC(kernel='linear', C=c_val)
svc_rbf = SVC(kernel='rbf', C=c_val, gamma=gmm)
svc_poly = SVC(kernel='poly', C=c_val, degree = d)

In [None]:
# Model Training
svc_rbf = svc_rbf.fit(X_train, y_train)
svc_poly = svc_poly.fit(X_train, y_train)

In [None]:
# Model Testing (Predict)
svc_rbf_pred = svc_rbf.predict(X_test)
svc_poly_pred = svc_poly.predict(X_test)

In [None]:
# Print Confusion Matrix and Classification Report for best k
print('Confusion Matrix of SVC_RBF: ')
print(confusion_matrix(y_test, svc_rbf_pred))
print('Classification Report of SVC_RBF: ')
print(classification_report(y_test, svc_rbf_pred))

In [None]:
# Print Confusion Matrix and Classification Report for best k
print('Confusion Matrix of SVC_RBF: ')
print(confusion_matrix(y_test, svc_poly_pred))
print('Classification Report of SVC_RBF: ')
print(classification_report(y_test, svc_poly_pred))

In [None]:
#Create Model Parameter Dictionary for SVC
kernel = ['rbf']
C_list = [0.1, 1.0, 10.0, 100.0, 200.0, 500.0]
Gamma_list = [0.01, 0.1, 1.0, 10]
d_list = [2, 3]
params = dict(kernel = kernel,C = C_list,gamma = Gamma_list,degree = d_list)

In [None]:
# Perform GridsearchCV() for each classification model
grid = GridSearchCV( estimator=  SVC(), n_jobs = 1, verbose = 10, scoring = 'accuracy', cv = 2, param_grid = params)
grid_result = grid.fit(X_train, y_train)

In [None]:
print('Best params : ',grid_result.best_params_)
print('Best Score  : ',grid_result.best_score_)

In [None]:
mean = grid_result.cv_results_['mean_test_score']
std = grid_result.cv_results_['std_test_score']
param = grid_result.cv_results_['params']
bar_mean = []
bar_std = []
bar_params = []
for mean,stdev,params in zip(mean,std,param):
    print('(%f,%f),%r'%(mean,stdev,params))
    bar_mean.append(mean)
    bar_std.append(stdev)
    bar_params.append('C_list : '+str(params['C'])+'G_list : '+str(params['gamma'])+'D_list'+str(params['degree']))

In [None]:
x = np.arange(len(bar_mean))
w = 0.5
fig,ax = plt.subplots()
fig = plt.title('kernel : poly')
rect1 = plt.bar(x-w/2,bar_mean,w,color = 'blue')
rect2 = plt.bar(x+w/2,bar_std,w,color = 'red')
ax.set_xticks(x,labels = bar_params,fontsize = 6,rotation = 90)
plt.subplots_adjust(bottom=0.20)
plt.show()