In [1]:
import numpy as np
import pandas as pd
import glob
from tqdm.notebook import tqdm
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score

In [2]:
def segmentation(df, overlap_rate, time_window):
    
    # make a list for segment window and its label
    seg_data = []

    #convert overlap rate to step for sliding window
    overlap = int((1 - overlap_rate)*time_window)
    
    # interpolate
    df = df.interpolate().ffill().fillna(0)
    #segment
    for i in range(0, len(df)-time_window+1, overlap):
        seg_data.append(df.loc[i:i+time_window-1, :].copy().reset_index(drop=True))
        
    return seg_data

In [None]:
def rename_columns(x_data):
    x_data.columns = [
        ""
    ]

In [3]:
data_list = []
for file in tqdm(glob.glob("../TrainData/*/*/*.csv")):
    tempdf = pd.read_csv(file)
    data_list.extend(segmentation(tempdf, 0.5, 500))

  0%|          | 0/151 [00:00<?, ?it/s]

In [16]:
def get_speed_acc(x_data):
    speed = x_data.diff().fillna(0)
    acc = speed.diff().fillna(0)
    speed.columns = [f"{col}_speed" for col in speed.columns]
    acc.columns = [f"{col}_acc" for col in acc.columns]
    return speed, acc

In [None]:
def get_joint_distances(x_data):
    # joints
    # Front head        ->  left shoulder       (1->8)
    # Front head        ->  right shoulder      (1->4)
    # left shoulder     ->  left wrist          (8->10)
    # right shoulder    ->  right wrist         (4->7)
    # v sacral          ->  left elbow          (13->9)
    # v sacral          ->  right elbow         (13->6)
    # v sacral          ->  left wrist          (13->10)
    # v sacral          ->  right wrist         (13->7)
    # v sacral          ->  rear head           (13->3)
    # v sacral          ->  top head            (13->2)
    # left wrist        ->  right wrist         (10->7)
    # left asis         ->  left wrist          (12->10)
    # right asis        ->  right wrist         (11->7)
    # left wrist        ->  top head            (10->2)
    # right wrist       ->  top head            (7->2)
    # top head          ->  left asis           (2->)
    pass

In [None]:
def get_joint_angles(x_data):
    # joints
    # left shoulder     ->  left elbow      ->  left wrist      (8->9->10)
    # right shoulder    ->  right elbow     ->  right wrist     (4->6->7)
    # right shoulder    ->  left shoulder   ->  front head      (4->8->1)
    # right shoulder    ->  left shoulder   ->  left elbow      (4->8->9)
    # left shoulder     ->  right shoulder  ->  right elbow     (8->4->6)
    # v sacral          ->  right offset    ->  rear head       (13->5->3)
    # vsacral           ->  top head        ->  front head      (13->2->1)
    # v sacral          ->  left shoulder   ->  left elbow      (13->8->9)
    # v sacral          ->  right shoulder  ->  right elbow     (13->4->6)
    # left asis         ->  left shoulder   ->  left elbow      (12->8->9)
    # right asis        -> right shoulder   ->  right elbow     (11->4->6)
    pass

In [None]:
def get_streams(x_data):
    pass

In [12]:
def get_features(x_data):
    #Set features list
    features = []
    #Set columns name list
    cols = x_data.columns.tolist()

    #Calculate features (STD, Average, Max, Min) for each data columns X Y Z 
    for k in cols:
        # std
        features.append(x_data[k].std(ddof=0))
        # avg
        features.append(np.average(x_data[k]))
        # max
        features.append(np.max(x_data[k]))
        # min
        features.append(np.min(x_data[k]))
        #median
        features.append(np.median(x_data[k]))                                
        features.append(np.var(x_data[k]))
#         #skewness
#         features.append(stats.skew(x_data[k]))
#         #kutosis
#         features.append(stats.kurtosis(x_data[k]))
    return features

In [13]:
features_list = []
label_list = []
for j in tqdm(range(0,len(data_list))):
    #extract only xyz columns
    x_data = data_list[j].drop(columns=["subject_id","activity"])

    #Get features and label for each elements
    features_list.append(get_features(x_data))
    label_list.append(data_list[j].iloc[0, -1])

  0%|          | 0/3573 [00:00<?, ?it/s]

In [20]:
rf = RFC(n_estimators=300,n_jobs=-1)

In [23]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(rf, features_list, label_list, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise', verbose=2)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:  3.8min finished


In [25]:
n_scores.mean(), n_scores

(0.8262024735406269,
 array([0.79329609, 0.82960894, 0.82681564, 0.85434174, 0.83753501,
        0.86834734, 0.79831933, 0.81792717, 0.83473389, 0.8487395 ,
        0.83798883, 0.79888268, 0.84078212, 0.82352941, 0.82913165,
        0.85154062, 0.80112045, 0.82913165, 0.81232493, 0.82072829,
        0.79050279, 0.81005587, 0.84357542, 0.85154062, 0.82633053,
        0.79551821, 0.82913165, 0.83753501, 0.85154062, 0.79551821]))