In [12]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from gtda.time_series import  SlidingWindow, Resampler
from gtda.homology import VietorisRipsPersistence
from gtda.time_series import TakensEmbedding
from gtda.diagrams import BettiCurve
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, KFold
from sklearn.naive_bayes import  BernoulliNB, MultinomialNB, CategoricalNB
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import accuracy_score
from sklearn import svm
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
import time

In [13]:
def load_data(path):
    filename_list = []
    for i in os.listdir(path):
        path_name = path + i
        data = pd.read_csv(path_name,header=None, delim_whitespace=True)
        filename_list.append(data)
    dataset = pd.concat(filename_list, axis=0, ignore_index=True)

    dataset.columns = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18',
                       '19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34',
                       '35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50',
                       '51','52','53','54']
    dataset.drop(index=list(dataset[dataset['2'] == 0].index), inplace=True)
    dataset.drop(index=list(dataset[dataset['2'] == 16].index), inplace=True)
    dataset.drop(index=list(dataset[dataset['2'] == 17].index), inplace=True)
    dataset = dataset.drop(columns = '3')
    dataset_filter = dataset.dropna()
    X_ = dataset_filter[['5','6','7','11','12','13','14','15','16','22','23','24','28','29',
                         '30','31','32','33','39','40','41','45','46','47','48','49','50']]
    y = dataset_filter['2']
    y_ = y.copy()
    
    y_[y_ <= 3] = 1
    return X_ , y_

In [14]:
def data_preprocesssing(data, label,  size, stride):
    Scaler = MinMaxScaler()
    data_ = Scaler.fit_transform(data)
    periodic_sampler = Resampler(period=2)
    data_resampled, label_resampled = periodic_sampler.fit_transform_resample(data_, label)
    SW = SlidingWindow(size=size, stride=stride)
    X, y = SW.fit_transform_resample(data_resampled, label_resampled)
    return  X, y


In [15]:
def extract_feature(data, time_delay, dimension):
    featuress = []
    for i in tqdm(range(data.shape[2])):
        data_ = data[:, :, i]
        TE = TakensEmbedding(time_delay=time_delay, dimension=dimension)
        Taken = TE.fit_transform(data_)
        VR = VietorisRipsPersistence(
            metric="euclidean",
            homology_dimensions=[0, 1],
            n_jobs=6,
            collapse_edges=True)
        VRs = VR.fit_transform(Taken)
        BE = BettiCurve()
        feature = BE.fit_transform(VRs)
        feature = feature.sum(axis=1)
        featuress.append(feature)
        time.sleep(1)
    featuress = np.concatenate(featuress, axis=1)
    return featuress

In [16]:
def model_RFC(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    RFC = RandomForestClassifier()
    RFC.fit(X, y)
    acc_score = RFC.score(X_valid, y_valid)
    print(acc_score)
    y_pred = RFC.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(RFC, feature, label, cv=cv)
    print(score)
    print(score.mean())

In [17]:
def model_NB(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    NB = MultinomialNB()
    NB.fit(X, y)
    acc_score = NB.score(X_valid, y_valid)
    print(acc_score)
    y_pred = NB.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(NB, feature, label, cv=cv)
    print(score)
    print(score.mean())

In [18]:
def model_SVM(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    SVM = svm.SVC(kernel='rbf')
    SVM.fit(X, y)
    print(SVM.score(X_valid, y_valid))
    y_pred = SVM.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(SVM, feature, label, cv=cv)
    print(score)
    print(score.mean())

In [19]:
def model_XGB(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    num_round = 150
    bst = XGBClassifier(max_depth=4, learning_rate=0.1, n_estimators=num_round, objective='binary:logistic')
    bst.fit(X, y)
    y_pred = bst.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(bst, feature, label, cv=cv)
    print(score)
    print(score.mean())

In [None]:
path = '../PAMAP2/Protocol/'
data,label = load_data(path)
data_processed, label_processed = data_preprocesssing(data, label, 128, 64)
feature = extract_feature(data_processed, 5, 10)
model_RFC(feature, label_processed, 5)
model_NB(feature, label_processed, 5)
model_SVM(feature, label_processed, 5)
model_XGB(feature, label_processed, 5)