In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from gtda.time_series import  SlidingWindow
from gtda.homology import VietorisRipsPersistence
from gtda.time_series import TakensEmbedding
from gtda.diagrams import BettiCurve
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, KFold
from sklearn.naive_bayes import  BernoulliNB, MultinomialNB, CategoricalNB
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import accuracy_score
from sklearn import svm
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
import time


In [2]:
def load_data(path):
    all_subject_data = []
    for i in os.listdir(path):
        subject_path = path + i
        subject_data = pd.read_csv(subject_path)
        all_subject_data.append(subject_data)
    all_subject_data = pd.concat(all_subject_data, axis=0)
    X = all_subject_data[['wri_Acc_X', 'wri_Acc_Y', 'wri_Acc_Z', 'wri_Gyr_X', 'wri_Gyr_Y', 'wri_Gyr_Z', 'wri_Mag_X', 'wri_Mag_Y', 'wri_Mag_Z',
                          'ank_Acc_X', 'ank_Acc_Y', 'ank_Acc_Z', 'ank_Gyr_X', 'ank_Gyr_Y', 'ank_Gyr_Z', 'ank_Mag_X', 'ank_Mag_Y','ank_Mag_Z',
                          'bac_Acc_X', 'bac_Acc_Y', 'bac_Acc_Z', 'bac_Gyr_X', 'bac_Gyr_Y', 'bac_Gyr_Z', 'bac_Mag_X', 'bac_Mag_Y', 'bac_Mag_Z']]
    y = all_subject_data['class']
    y[y <= 3] = 1
    return X, y

In [3]:
def data_preprocesssing(data, label, size, stride):
    Scaler = MinMaxScaler()
    data_ = Scaler.fit_transform(data)
    SW = SlidingWindow(size=size, stride=stride)
    X, y = SW.fit_transform_resample(data_, label)
    return  X, y


In [4]:
def extract_feature(data, time_delay, dimension):
    featuress = []
    for i in tqdm(range(data.shape[2])):
        data_ = data[:, :, i]
        TE = TakensEmbedding(time_delay=time_delay, dimension=dimension)
        Taken = TE.fit_transform(data_)
        VR = VietorisRipsPersistence(
            metric="euclidean",
            homology_dimensions=[0, 1],
            n_jobs=6,
            collapse_edges=True)
        VRs = VR.fit_transform(Taken)
        BE = BettiCurve()
        feature = BE.fit_transform(VRs)
        feature = feature.sum(axis=1)
        featuress.append(feature)
        time.sleep(1)
    featuress = np.concatenate(featuress, axis=1)
    return featuress


In [5]:
def model_RFC(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    RFC = RandomForestClassifier()
    RFC.fit(X, y)
    acc_score = RFC.score(X_valid, y_valid)
    print(acc_score)
    y_pred = RFC.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(RFC, feature, label, cv=cv)
    print(score)
    print(score.mean())

In [6]:
def model_NB(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    NB = MultinomialNB()
    NB.fit(X, y)
    acc_score = NB.score(X_valid, y_valid)
    print(acc_score)
    y_pred = NB.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(NB, feature, label, cv=cv)
    print(score)
    print( score.mean())


In [7]:
def model_SVM(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    SVM = svm.SVC(kernel='rbf')
    SVM.fit(X, y)
    print(SVM.score(X_valid, y_valid))
    y_pred = SVM.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(SVM, feature, label, cv=cv)
    print(score)
    print(score.mean())


In [8]:
def model_XGB(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    num_round = 150
    bst = XGBClassifier(max_depth=4, learning_rate=0.1, n_estimators=num_round, objective='binary:logistic')
    bst.fit(X, y)
    y_pred = bst.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(bst, feature, label, cv=cv)
    print(score)
    print(score.mean())


In [None]:
path = './TNDADATASET/'
data,label = load_data(path)
data_processed, label_processed = data_preprocesssing(data, label, 128, 64)
feature = extract_feature(data_processed, 5, 5)
# plot_result(feature, label)
model_RFC(feature, label_processed, 5)
model_NB(feature, label_processed, 5)
model_SVM(feature, label_processed, 5)
model_XGB(feature, label_processed, 5)