### Mount data from google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [44]:
ACTION_MAP_ID = {
    0: 'sit down', # sit down
    1: 'stand up', # stand up
    2: 'jump', # jump
    3: 'hand shake', # hand shake
    4: 'walk', # walk
}

### Import

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### Load data

In [4]:
path = 'drive/MyDrive/dataset.csv'

In [5]:
df = pd.read_csv(path)

print(df.shape)
data = df.values
print(data.shape)

n_samples = data.shape[0]
n_frame = 15
n_point = 14

X = data[:,:-1]
y = data[:, -1].astype('int')

X = X.reshape((n_samples, n_frame, n_point, 2))
print(X.shape)


(33736, 421)
(33736, 421)
(33736, 15, 14, 2)


### Normalization

In [11]:
def norm_inner_feat(X):
  temp_X = X - X[:, :, 0, :].reshape(X.shape[0], X.shape[1], 1, X.shape[-1])
  return temp_X[:, :, 1:, :]


def norm_outer_feat(X, fps=30):
  n_samples = X.shape[0]
  n_frame = X.shape[1]
  n_joint = X.shape[2]
  v = np.zeros((n_samples, n_frame - 1, n_joint, 2))
  time = 1 / fps
  for n in range(n_samples):
      for f in range(n_frame-1):
          v[n,f] = (X[n,f+1] - X[n,f]) / time
  return v

In [12]:
inner_feat = norm_inner_feat(X)
outer_feat = norm_outer_feat(X)

In [22]:
X_norm = np.hstack([inner_feat.reshape(len(X), -1), outer_feat.reshape(len(X), -1)])

## Modeling 

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size = 5000, stratify=y, random_state=42)


### SVM

In [26]:
import datetime
start =  datetime.datetime.now()

clf = svm.SVC(kernel='rbf', class_weight='balanced', random_state=42)
clf.fit(X_train, y_train)

end =  datetime.datetime.now()
print('Time: ', end - start)

Time:  0:19:15.088909


In [32]:
def print_score(clf, X_train, y_train, X_test, y_test, train=True):
    '''
    print the accuracy score, classification report and confusion matrix of classifier
    '''
    gt = y_train
    y_predict = clf.predict(X_train)
    if train==False:
      gt = y_test
      y_predict = clf.predict(X_test)

    print("Test Result:\n")        
    print("accuracy score: {0:.4f}\n".format(accuracy_score(gt, y_predict)))
    print("Classification Report: \n {}\n".format(classification_report(gt, y_predict)))
    print("Confusion Matrix: \n {}\n".format(confusion_matrix(gt, y_predict)))    
        

In [28]:
start =  datetime.datetime.now()

print_score(clf, X_train, y_train, X_test, y_test, train=True)

end =  datetime.datetime.now()
print('Time: ', end - start)

Test Result:

accuracy score: 0.7080

Classification Report: 
               precision    recall  f1-score   support

           0       0.58      0.87      0.70      2882
           1       0.50      0.59      0.54      6284
           2       0.76      0.66      0.71      2266
           3       0.69      0.80      0.74      4615
           4       0.93      0.71      0.80     12689

    accuracy                           0.71     28736
   macro avg       0.69      0.72      0.70     28736
weighted avg       0.75      0.71      0.72     28736


Confusion Matrix: 
 [[2498  229   44   38   73]
 [1032 3722  407  759  364]
 [ 122  488 1490   99   67]
 [ 158  571    0 3671  215]
 [ 477 2453   17  779 8963]]

Time:  0:13:08.923446


In [33]:
start =  datetime.datetime.now()

print_score(clf, X_train, y_train, X_test, y_test, train=False)

end =  datetime.datetime.now()
print('Time: ', end - start)

Test Result:

accuracy score: 0.6566

Classification Report: 
               precision    recall  f1-score   support

           0       0.50      0.76      0.60       502
           1       0.44      0.52      0.48      1093
           2       0.70      0.63      0.67       394
           3       0.67      0.76      0.71       803
           4       0.88      0.67      0.76      2208

    accuracy                           0.66      5000
   macro avg       0.64      0.67      0.64      5000
weighted avg       0.70      0.66      0.67      5000


Confusion Matrix: 
 [[ 381   58   12    8   43]
 [ 197  573   89  141   93]
 [  20   85  250   18   21]
 [  32  118    0  610   43]
 [ 131  473    5  130 1469]]

Time:  0:15:27.350258


In [34]:
from sklearn.externals import joblib

joblib.dump(clf, 'svm_15_d.pkl')



['svm_15_d.pkl']

### XGBoost

In [46]:
from xgboost import XGBClassifier

In [57]:
param = {
    "n_estimators": 100,
    "nthread": -1,
    "eta": 0.1,
    "max_depth": 20,
}

In [58]:
xgb = XGBClassifier(**param)


In [59]:
xgb.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eta=0.1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=20,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=-1, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [62]:
start =  datetime.datetime.now()

print_score(xgb, X_train, y_train, X_test, y_test, train=True)

end =  datetime.datetime.now()
print('Time: ', end - start)

Test Result:

accuracy score: 0.9998

Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2882
           1       1.00      1.00      1.00      6284
           2       1.00      1.00      1.00      2266
           3       1.00      1.00      1.00      4615
           4       1.00      1.00      1.00     12689

    accuracy                           1.00     28736
   macro avg       1.00      1.00      1.00     28736
weighted avg       1.00      1.00      1.00     28736


Confusion Matrix: 
 [[ 2882     0     0     0     0]
 [    5  6279     0     0     0]
 [    0     0  2266     0     0]
 [    1     0     0  4614     0]
 [    0     0     0     0 12689]]

Time:  0:00:05.647985


In [60]:
start =  datetime.datetime.now()

print_score(xgb, X_train, y_train, X_test, y_test, train=False)

end =  datetime.datetime.now()
print('Time: ', end - start)

Test Result:

accuracy score: 0.8142

Classification Report: 
               precision    recall  f1-score   support

           0       0.83      0.78      0.80       502
           1       0.69      0.67      0.68      1093
           2       0.79      0.61      0.69       394
           3       0.84      0.85      0.84       803
           4       0.86      0.92      0.89      2208

    accuracy                           0.81      5000
   macro avg       0.80      0.76      0.78      5000
weighted avg       0.81      0.81      0.81      5000


Confusion Matrix: 
 [[ 390   53    4    4   51]
 [  45  736   59   92  161]
 [  18   80  240    4   52]
 [   6   56    0  680   61]
 [  11  138    2   32 2025]]

Time:  0:00:06.976420


In [61]:
xgb.save_model('xgb_15_d.pkl')

## XGBoost với windows = 30 frame

In [65]:
path = 'drive/MyDrive/30_dataset.csv'

In [66]:
df = pd.read_csv(path)
print(df.shape)

# Bỏ những trường hợp người đi bộ ra ngoài khung hình
df = df[~((df==1).any(axis=1) & (df.iloc[:, -1] == 4))]
data = df.values

print(data.shape)

n_samples = data.shape[0]
n_frame = 30
n_point = 14

X = data[:,:-1]
y = data[:, -1].astype('int')

X = X.reshape((n_samples, n_frame, n_point, 2))
print(X.shape)


(16232, 841)
(14710, 841)
(14710, 30, 14, 2)


#### Normalize

In [68]:
inner_feat = norm_inner_feat(X)
outer_feat = norm_outer_feat(X)
X_norm = np.hstack([inner_feat.reshape(len(X), -1), outer_feat.reshape(len(X), -1)])

#### Modeling

In [69]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size = 0.2, stratify=y, random_state=42)


In [96]:
param = {
    "n_estimators": 15,
    "nthread": -1,
    "eta": 0.1,
    "max_depth": 15,
}

In [97]:
xgb = XGBClassifier(**param)

In [98]:
start =  datetime.datetime.now()
xgb.fit(X_train, y_train)
end =  datetime.datetime.now()
print('Time: ', end - start)

Time:  0:06:38.064610


In [99]:
start =  datetime.datetime.now()

print_score(xgb, X_train, y_train, X_test, y_test, train=True)

end =  datetime.datetime.now()
print('Time: ', end - start)

Test Result:

accuracy score: 0.9957

Classification Report: 
               precision    recall  f1-score   support

           0       1.00      0.99      1.00       766
           1       0.99      1.00      1.00      3489
           2       1.00      0.98      0.99      1269
           3       1.00      1.00      1.00      1153
           4       1.00      1.00      1.00      5091

    accuracy                           1.00     11768
   macro avg       1.00      0.99      1.00     11768
weighted avg       1.00      1.00      1.00     11768


Confusion Matrix: 
 [[ 761    5    0    0    0]
 [   0 3482    0    0    7]
 [   1   12 1242    0   14]
 [   0    4    0 1148    1]
 [   0    6    0    1 5084]]

Time:  0:00:00.722938


In [100]:
start =  datetime.datetime.now()

print_score(xgb, X_train, y_train, X_test, y_test, train=False)

end =  datetime.datetime.now()
print('Time: ', end - start)

Test Result:

accuracy score: 0.8644

Classification Report: 
               precision    recall  f1-score   support

           0       0.78      0.72      0.75       191
           1       0.79      0.85      0.82       872
           2       0.93      0.87      0.90       318
           3       0.84      0.70      0.76       288
           4       0.92      0.93      0.93      1273

    accuracy                           0.86      2942
   macro avg       0.85      0.81      0.83      2942
weighted avg       0.87      0.86      0.86      2942


Confusion Matrix: 
 [[ 138   33    5    2   13]
 [  26  739   12   28   67]
 [   2   21  277    2   16]
 [   2   75    0  201   10]
 [   8   66    5    6 1188]]

Time:  0:00:00.809374


In [101]:
xgb.save_model('xgb_30_d.pkl')