In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from gtda.time_series import  SlidingWindow, Resampler
from gtda.homology import VietorisRipsPersistence
from gtda.time_series import TakensEmbedding
from gtda.diagrams import BettiCurve
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, KFold
from sklearn.naive_bayes import  BernoulliNB, MultinomialNB, CategoricalNB
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import accuracy_score
from sklearn import svm
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
import time


In [4]:
def load_file(filepath):
    dataframe = pd.read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values


In [12]:
def load_dataset(data_rootdir, dirname, group):
    filename_list = []
    filepath_list = []
    X = []
    for rootdir, dirnames, filenames in os.walk(data_rootdir + dirname):
        for filename in filenames:
            filename_list.append(filename)
            filepath_list.append(os.path.join(rootdir, filename))
    for filepath in filepath_list:
        X.append(load_file(filepath))
    X = np.dstack(X)
    y = load_file(data_rootdir + '/y_' + group + '.txt')
    return X, y


In [19]:
def extract_feature(data, time_delay, dimension):
    featuress = []
    for i in tqdm(range(data.shape[2])):
        data_ = data[:, :, i]
        TE = TakensEmbedding(time_delay=time_delay, dimension=dimension)
        Taken = TE.fit_transform(data_)
        VR = VietorisRipsPersistence(
            metric="euclidean",
            homology_dimensions=[0, 1, 2],
            n_jobs=6,
            collapse_edges=True)
        VRs = VR.fit_transform(Taken)
        BE = BettiCurve()
        feature = BE.fit_transform(VRs)
        feature = feature.sum(axis=1)
        featuress.append(feature)
        time.sleep(1)
    featuress = np.concatenate(featuress, axis=1)
    return featuress


In [20]:
def model_RFC(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3, random_state=42)
    RFC = RandomForestClassifier()
    RFC.fit(X, y)
    acc_score = RFC.score(X_valid, y_valid)
    print(acc_score)
    y_pred = RFC.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(RFC, feature, label, cv=cv)
    print(score)
    print(score.mean())



In [21]:
def model_NB(feature, label,n_splits):
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3)
    NB = MultinomialNB()
    NB.fit(X, y)
    acc_score = NB.score(X_valid, y_valid)
    print(acc_score)
    y_pred = NB.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(NB, feature, label, cv=cv)
    print(score)
    print(score.mean())


In [22]:
def model_SVM(feature, label,n_splits):
    # 划分数据集和测试集
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3)
    #定义SVM模型
    SVM = svm.SVC(kernel='rbf')
    SVM.fit(X, y)
    print(SVM.score(X_valid, y_valid))
    y_pred = SVM.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(SVM, feature, label, cv=cv)
    print(score)
    print(n_splits, '折SVM交叉验证均值为：', score.mean())

In [23]:
def model_XGB(feature, label,n_splits):
    # 划分数据集和测试集
    X, X_valid, y, y_valid = train_test_split(feature, label, test_size=0.3)
    num_round = 150
    bst = XGBClassifier(max_depth=4, learning_rate=0.1, n_estimators=num_round, objective='binary:logistic')
    bst.fit(X, y)
    y_pred = bst.predict(X_valid)
    print(classification_report(y_valid, y_pred, digits=4))
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=None)
    score = cross_val_score(bst, feature, label, cv=cv)
    print(score)
    print(score.mean())

In [24]:
train_rootdir = './train/'
data_dirname = '/Inertial Signals/'
trainX, trainy = load_dataset(train_rootdir, data_dirname, 'train')
trainX
test_rootdir = './test/'
data_dirname = '/Inertial Signals/'
testX, testy = load_dataset(test_rootdir, data_dirname, 'test')
data = np.concatenate((trainX,testX),axis=0)
label = np.concatenate((trainy,testy),axis=0)
label[label >= 4] = 4
label = label.ravel()

In [None]:
feature = extract_feature(data, 5, 3)
model_RFC(feature, label, 5)
model_NB(feature, label, 5)
model_SVM(feature, label, 5)
model_XGB(feature, label, 5)