All topological realated functions are used from the package giotto-tda

Reference: giotto-tda: A Topological Data Analysis Toolkit for Machine Learning and Data Exploration, Tauzin et al, arXiv:2004.02551, 2020.

Some part of the code were take from the tutorials and examples of giotto-tda. Reference: https://giotto-ai.github.io/gtda-docs/latest/notebooks/index.html

In [1]:
#Run this if giott-tda is not installed
#pip install giotto-tda

In [1]:
import numpy as np
import pandas as pd

#Libraries for toplogical data analysis
from gtda.homology import CubicalPersistence #persistance diagram
from gtda.time_series import TakensEmbedding #embeddings
from gtda.metaestimators import CollectionTransformer
#topological features
from gtda.diagrams import Scaler, BettiCurve, Amplitude, PersistenceEntropy, NumberOfPoints, PersistenceLandscape, HeatKernel,PersistenceImage
from gtda.pipeline import Pipeline

#libraries for classifiers
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import sklearn.metrics
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.utils import shuffle

from os import listdir, walk
from os.path import isfile, join, relpath
import pickle

import matplotlib.pyplot as plt

import math


In [2]:
def write_to_file(data, filename):
    '''
    Utitlity function to dump data to file
    '''
    with open("pickle_dumps/%s.pk1"%filename, 'wb') as f:
        pickle.dump(data, f)

def read_from_file(filename):
    '''
    Utility function to read data from file
    '''
    with open("pickle_dumps/%s.pk1"%filename, 'rb') as f:
        data = pickle.load(f)
    return data;

def read_filenames(filepath):
    '''
    Utility functionn to the file name of raw data 
    '''
    path="data/wisdm-dataset/raw/" + filepath #path of raw data files
    files = [relpath(join(dirpath, f)) for dirpath,_,filenames in walk(path) 
             for f in filenames if f.endswith(".txt")] #read only txt files
    return files

In [3]:
#Some useful global variables
noofsubjects = 51
noofactivities = 18
activity_labels = np.append(range(1, 14), range(15, 20)) #class labels
segmentsize=400 #window size of time series data (20 seconda)
cols = ["tree_accuracy", "forest_accuracy", "kn_accuracy", 
        "tree_recall", "forest_recall", "kn_recall",
        "tree_precision", "forest_precision", "kn_precision",
        "tree_f1", "foresh_ft", "kn_f1"]

In [4]:
#persistance diagram object in homology dimension 0 and 1
persistence = CubicalPersistence(homology_dimensions=[0, 1], n_jobs=-1)

#scale
scaling = Scaler()

#steps in the pipele - configuratble
steps = [("persistence", persistence),
         ("scaling", scaling), 
         ]

topological_transfomer = Pipeline(steps)

In [5]:
def generate_time_segments(filepath):
    '''
    Generates segmented data from the raw data for all 51 subjects
    '''
    inputarr=[] #input
    output=[] #output labels
    subjectid_arr=[] #array of subject ids
        
    #get filesname of the data files
    files=read_filenames(filepath)
    
    #loop through all 51 subjects
    for subjectid, file in enumerate(files):
        data = pd.read_csv(file)
        
        #loop through each activity and get data
        for class_label in  activity_labels:
            #get data for each activity
            rawdata=np.array(data.loc[data['activity']==chr(class_label+64), ['X','Y', 'Z']])
              
            #if no data exists for the activity skip and continue
            if rawdata.size <= 0:
                continue
        
            #no of samples of size=segment size
            #rawdata=scaler.fit_transform(rawdata)
            samples = math.floor(rawdata.shape[0]/segmentsize)
        
            #loop through the segments
            start=0
            for i in range(samples):
                segmenteddata=rawdata[start:(start+segmentsize)]
              
                #break the look if data size < segment size 
                if segmenteddata.shape[0] < segmentsize:
                    break;
            
                #Perform time delay embedding with embedding dimension=3
                embedded_data=np.column_stack([
                    segmenteddata[0:-2,:],
                    segmenteddata[1:-1,:],
                    segmenteddata[2:,:],
                ])
                
                inputarr.append(embedded_data)
                output.append(class_label)
                subjectid_arr.append(subjectid)
            
                start += segmentsize
    
    return np.array(inputarr), np.array(subjectid_arr), np.array(output)

In [6]:
def generate_topological_features(inputarr, nbins=50):
    '''
    Generates persistance homology for the the raw data in segmentsize windows.
    Extract topological features out of the persistance homology diagrams
    '''
    #topological feature extracted from persistance homology
    entropy=PersistenceEntropy(n_jobs=-1, normalize=True)
    landscape=Amplitude(metric="landscape", n_jobs=-1, metric_params={"n_bins":nbins})
    wasserstein=Amplitude(metric="wasserstein", n_jobs=-1)
    betti=Amplitude(metric="betti", n_jobs=-1, metric_params={"n_bins":nbins})
    bottleneck=Amplitude(metric="bottleneck", n_jobs=-1)
    p_image=Amplitude(metric="persistence_image", n_jobs=-1, metric_params={"n_bins":nbins})
    heat=Amplitude(metric="heat", n_jobs=-1, metric_params={"n_bins":nbins})
    silhouette=Amplitude(metric="silhouette", n_jobs=-1, metric_params={"n_bins":nbins})
    noofpoints=NumberOfPoints(n_jobs=-1)

    #generate persistance homology diagram
    features = topological_transfomer.fit_transform(inputarr)
    
    #extract topological features
    features_array=np.column_stack([
            entropy.fit_transform(features),
            landscape.fit_transform(features),
            wasserstein.fit_transform(features),
            betti.fit_transform(features),
            bottleneck.fit_transform(features),
            p_image.fit_transform(features),
            heat.fit_transform(features),
            silhouette.fit_transform(features),
            noofpoints.fit_transform(features),       
    ])

    return features_array


In [7]:
def generate_features(inputarr, subject, output, nbins):
    '''
    Generate topological features for the input
    '''
    features = np.empty((0,18), float)
    class_label=np.empty((0,1), int)
    subject_label = np.empty((0,1), int)
    
    for subjectid in range(51):
        print("Processing subject %i"%subjectid)
        X = inputarr[subject==subjectid]
        y = output[subject==subjectid]
        
        if X.shape[0] == 0:
            continue;
            
        #generate topological features
        features = np.append(features, generate_topological_features(X, nbins), axis=0)
        class_label = np.append(class_label, y)
        subject_label = np.append(subject_label, np.full((X.shape[0], 1), subjectid, int))
    
    return features, class_label, subject_label

In [8]:
def impersonal_model(features, class_label, subject_label):
    '''
    Trains impersonal nodel for the toplogical features.
    Classifiers trained - decision tree, random forest and k neighbours
    '''
    tree_model=[]
    forest_model=[]
    kn_model=[]
    X_tst=[]
    y_tst=[]
    scaler = preprocessing.StandardScaler()
    
    #loop through all 51 subjects
    for subjectid in range(51):
        #LOSO validation
        print("Validating subject %i"%subjectid)
        X_train = features[subject_label!=subjectid]
        y_train = class_label[subject_label!=subjectid]
        X_test = features[subject_label==subjectid] 
        y_test = class_label[subject_label==subjectid]
        
        #shuffle training samples
        X_train, y_train = shuffle(X_train, y_train)
        
        #schale the features
        X_train=scaler.fit_transform(X_train)
        X_test=scaler.transform(X_test)
    
        #train the model
        forest = RandomForestClassifier(random_state=0)
        tree = DecisionTreeClassifier(random_state=0)
        kneighbors=KNeighborsClassifier()
        
        #fit the models
        forest.fit(X_train, y_train)
        tree.fit(X_train, y_train)
        kneighbors.fit(X_train, y_train)
        
        tree_model.append(tree)
        forest_model.append(forest)
        kn_model.append(kneighbors)
        X_tst.append(X_test)
        y_tst.append(y_test)
        
    return tree_model, forest_model, kn_model, X_tst, y_tst

In [9]:
def personal_model(features, class_label, subject_label):
    '''
    Trains personal nodel for the toplogical features.
    Classifiers trained - decision tree, random forest and k neighbours
    '''
    tree_model=[]
    forest_model=[]
    kn_model=[]
    X_tst=[]
    y_tst=[]
    scaler = preprocessing.StandardScaler()
    
    #loop through all 51 subjects
    for subjectid in range(51):
        print("Processing subject %i"%subjectid)
        X = features[subject_label==subjectid]
        y = class_label[subject_label==subjectid]
        
        #split the data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

        #schale the features
        X_train=scaler.fit_transform(X_train)
        X_test=scaler.transform(X_test)
        
        #train the model
        forest = RandomForestClassifier(random_state=0)
        tree = DecisionTreeClassifier(random_state=0)
        kneighbors=KNeighborsClassifier()
        
        #fit the models
        forest.fit(X_train, y_train)
        tree.fit(X_train, y_train)
        kneighbors.fit(X_train, y_train)
        
        tree_model.append(tree)
        forest_model.append(forest)
        kn_model.append(kneighbors)
        X_tst.append(X_test)
        y_tst.append(y_test)
        
    return tree_model, forest_model, kn_model, X_tst, y_tst

In [10]:
#calculate avg
def calculate_score(tree_model, forest_model, kn_model, X_tst, y_tst):
    '''
    Evaluates the model and return the average metric scores
    '''
    #initialize to np.nan to handle missing subjects and activities
    #use np.nanmean to ignore nan values
    noOfSubjects = len(tree_model)
    recall_score=np.full([3, noOfSubjects,18], np.nan)
    precision_score=np.full([3, noOfSubjects,18], np.nan)
    f1_score= np.full([3, noOfSubjects,18], np.nan)
    accuracy=np.full([3, noOfSubjects,18], np.nan)
    overall_accuracy = np.full([3, noOfSubjects], np.nan)
    
    for subjectid, X_test in enumerate(X_tst):
        if len(X_test) == 0:
            continue
        for i, clf in enumerate([tree_model[subjectid], forest_model[subjectid], kn_model[subjectid]]):
            y_pred = clf.predict(X_test)
            with np.errstate(divide='ignore', invalid='ignore'):
                recall_score[i][subjectid] = sklearn.metrics.recall_score(y_tst[subjectid], y_pred, average=None, labels=activity_labels, zero_division=0)
                precision_score[i][subjectid] = sklearn.metrics.precision_score(y_tst[subjectid], y_pred, average=None, labels=activity_labels, zero_division=0)
                f1_score[i][subjectid] = sklearn.metrics.f1_score(y_tst[subjectid], y_pred, average=None, labels=activity_labels, zero_division=0)
                cm = sklearn.metrics.confusion_matrix(y_tst[subjectid], y_pred, labels=activity_labels)
                accuracy[i][subjectid] = cm.diagonal()/cm.sum(axis=1)
                overall_accuracy[i][subjectid] = sklearn.metrics.accuracy_score(y_tst[subjectid], y_pred)

    score = np.column_stack([np.nanmean(accuracy, axis=1).T, 
                             np.nanmean(recall_score, axis=1).T, 
                             np.nanmean(precision_score, axis=1).T, 
                             np.nanmean(f1_score, axis=1).T]) * 100
    
    return score, np.nanmean(overall_accuracy, axis=1)


In [11]:
#get all time segments from each sensors
phone_accel_input, phone_accel_subjectid, phone_accel_output = generate_time_segments("phone/accel/")
phone_gyro_input, phone_gyro_subjectid, phone_gyro_output = generate_time_segments("phone/gyro/")
watch_accel_input, watch_accel_subjectid, watch_accel_output = generate_time_segments("watch/accel/")
watch_gyro_input, watch_gyro_subjectid, watch_gyro_output = generate_time_segments("watch/gyro/")

In [12]:
print("Phone accel: input %s subcjectid %s output %s"%(np.shape(phone_accel_input), np.shape(phone_accel_subjectid), np.shape(phone_accel_output)))
print("Phone gyro: input %s subcjectid %s output %s"%(np.shape(phone_gyro_input), np.shape(phone_gyro_subjectid), np.shape(phone_gyro_output)))
print("Watch accel: input %s subcjectid %s output %s"%(np.shape(watch_accel_input), np.shape(watch_accel_subjectid), np.shape(watch_accel_output)))
print("Watch gyro: input %s subcjectid %s output %s"%(np.shape(watch_gyro_input), np.shape(watch_gyro_subjectid), np.shape(watch_gyro_output)))

Phone accel: input (11465, 398, 9) subcjectid (11465,) output (11465,)
Phone gyro: input (8407, 398, 9) subcjectid (8407,) output (8407,)
Watch accel: input (9396, 398, 9) subcjectid (9396,) output (9396,)
Watch gyro: input (8448, 398, 9) subcjectid (8448,) output (8448,)


In [14]:
#generate features for phone accel 
(phone_accel_features, phone_accel_class_label, phone_accel_subject) = generate_features(
     phone_accel_input, 
     phone_accel_subjectid, 
     phone_accel_output, nbins=100)

print(phone_accel_features.shape)
print(phone_accel_class_label.shape)
print(phone_accel_subject.shape)
write_to_file((phone_accel_features, phone_accel_class_label, phone_accel_subject), "phone_accel_features")

In [15]:
#generate features for phone gyro 
(phone_gyro_features, phone_gyro_class_label, phone_gyro_subject) = generate_features(
     phone_gyro_input, 
     phone_gyro_subjectid, 
     phone_gyro_output, nbins=100)

print(phone_gyro_features.shape)
print(phone_gyro_class_label.shape)
print(phone_gyro_subject.shape)

write_to_file((phone_gyro_features, phone_gyro_class_label, phone_gyro_subject), "phone_gyro_features")

Processing subject 0
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Processing subject 21
Processing subject 22
Processing subject 23
Processing subject 24
Processing subject 25
Processing subject 26
Processing subject 27
Processing subject 28
Processing subject 29
Processing subject 30
Processing subject 31
Processing subject 32
Processing subject 33
Processing subject 34
Processing subject 35
Processing subject 36
Processing subject 37
Processing subject 38
Processing subject 39
Processing subject 40
Processing subject 41
Processing subject 42
Processing subject 43
Processing subject 44
Processing subject 4

In [16]:
#generate features for watch accel 
(watch_accel_features, watch_accel_class_label, watch_accel_subject) = generate_features(
     watch_accel_input, 
     watch_accel_subjectid, 
     watch_accel_output, nbins=100)

print(watch_accel_features.shape)
print(watch_accel_class_label.shape)
print(watch_accel_subject.shape)
write_to_file((watch_accel_features, watch_accel_class_label, watch_accel_subject), "watch_accel_features")

Processing subject 0
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Processing subject 21
Processing subject 22
Processing subject 23
Processing subject 24
Processing subject 25
Processing subject 26
Processing subject 27
Processing subject 28
Processing subject 29
Processing subject 30
Processing subject 31
Processing subject 32
Processing subject 33
Processing subject 34
Processing subject 35
Processing subject 36
Processing subject 37
Processing subject 38
Processing subject 39
Processing subject 40
Processing subject 41
Processing subject 42
Processing subject 43
Processing subject 44
Processing subject 4

In [17]:
#generate features for watch gyro 
(watch_gyro_features, watch_gyro_class_label, watch_gyro_subject) = generate_features(
     watch_gyro_input, 
     watch_gyro_subjectid, 
     watch_gyro_output, nbins=100)

print(watch_gyro_features.shape)
print(watch_gyro_class_label.shape)
print(watch_gyro_subject.shape)
write_to_file((watch_gyro_features, watch_gyro_class_label, watch_gyro_subject), "watch_gyro_features")

Processing subject 0
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Processing subject 21
Processing subject 22
Processing subject 23
Processing subject 24
Processing subject 25
Processing subject 26
Processing subject 27
Processing subject 28
Processing subject 29
Processing subject 30
Processing subject 31
Processing subject 32
Processing subject 33
Processing subject 34
Processing subject 35
Processing subject 36
Processing subject 37
Processing subject 38
Processing subject 39
Processing subject 40
Processing subject 41
Processing subject 42
Processing subject 43
Processing subject 44
Processing subject 4

In [17]:
#generate personal models for phone accel 
(phone_accel_tree_model, 
 phone_accel_forest_model, 
 phone_accel_kn_model, 
 phone_accel_X_tst, 
 phone_accel_y_tst) = personal_model(phone_accel_features, phone_accel_class_label, phone_accel_subject)

write_to_file((phone_accel_tree_model, 
               phone_accel_forest_model, 
               phone_accel_kn_model, 
               phone_accel_X_tst, 
               phone_accel_y_tst), "phone_accel_personal_model")


Processing subject 0
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Processing subject 21
Processing subject 22
Processing subject 23
Processing subject 24
Processing subject 25
Processing subject 26
Processing subject 27
Processing subject 28
Processing subject 29
Processing subject 30
Processing subject 31
Processing subject 32
Processing subject 33
Processing subject 34
Processing subject 35
Processing subject 36
Processing subject 37
Processing subject 38
Processing subject 39
Processing subject 40
Processing subject 41
Processing subject 42
Processing subject 43
Processing subject 44
Processing subject 4

In [18]:
#generate personal models for phone gyro and classfiy using it
(phone_gyro_tree_model, 
 phone_gyro_forest_model, 
 phone_gyro_kn_model, 
 phone_gyro_X_tst, 
 phone_gyro_y_tst) = personal_model(phone_gyro_features, phone_gyro_class_label, phone_gyro_subject)


write_to_file((phone_gyro_tree_model, 
               phone_gyro_forest_model, 
               phone_gyro_kn_model, 
               phone_gyro_X_tst, 
               phone_gyro_y_tst), "phone_gyro_personal_model")


Processing subject 0
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Processing subject 21
Processing subject 22
Processing subject 23
Processing subject 24
Processing subject 25
Processing subject 26
Processing subject 27
Processing subject 28
Processing subject 29
Processing subject 30
Processing subject 31
Processing subject 32
Processing subject 33
Processing subject 34
Processing subject 35
Processing subject 36
Processing subject 37
Processing subject 38
Processing subject 39
Processing subject 40
Processing subject 41
Processing subject 42
Processing subject 43
Processing subject 44
Processing subject 4

In [19]:
#generate personal models for watch accel and classfiy using it
(watch_accel_tree_model, 
 watch_accel_forest_model, 
 watch_accel_kn_model, 
 watch_accel_X_tst, 
 watch_accel_y_tst) = personal_model(watch_accel_features, watch_accel_class_label, watch_accel_subject)

write_to_file((watch_accel_tree_model, 
               watch_accel_forest_model, 
               watch_accel_kn_model, 
               watch_accel_X_tst, 
               watch_accel_y_tst), "watch_accel_personal_model")

Processing subject 0
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Processing subject 21
Processing subject 22
Processing subject 23
Processing subject 24
Processing subject 25
Processing subject 26
Processing subject 27
Processing subject 28
Processing subject 29
Processing subject 30
Processing subject 31
Processing subject 32
Processing subject 33
Processing subject 34
Processing subject 35
Processing subject 36
Processing subject 37
Processing subject 38
Processing subject 39
Processing subject 40
Processing subject 41
Processing subject 42
Processing subject 43
Processing subject 44
Processing subject 4

In [20]:
#generate personal models for watch gyro and classfiy using it
(watch_gyro_tree_model, 
 watch_gyro_forest_model, 
 watch_gyro_kn_model, 
 watch_gyro_X_tst, 
 watch_gyro_y_tst) = personal_model(watch_gyro_features, watch_gyro_class_label, watch_gyro_subject)

write_to_file((watch_gyro_tree_model, 
               watch_gyro_forest_model, 
               watch_gyro_kn_model, 
               watch_gyro_X_tst, 
               watch_gyro_y_tst), "watch_gyro_personal_model")

Processing subject 0
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Processing subject 21
Processing subject 22
Processing subject 23
Processing subject 24
Processing subject 25
Processing subject 26
Processing subject 27
Processing subject 28
Processing subject 29
Processing subject 30
Processing subject 31
Processing subject 32
Processing subject 33
Processing subject 34
Processing subject 35
Processing subject 36
Processing subject 37
Processing subject 38
Processing subject 39
Processing subject 40
Processing subject 41
Processing subject 42
Processing subject 43
Processing subject 44
Processing subject 4

In [21]:
#calculate personal model accuracy scores

phone_accel_score_personal = calculate_score(phone_accel_tree_model, 
                                   phone_accel_forest_model, 
                                   phone_accel_kn_model, 
                                   phone_accel_X_tst, 
                                   phone_accel_y_tst)

phone_gyro_score_personal = calculate_score(phone_gyro_tree_model, 
                                   phone_gyro_forest_model, 
                                   phone_gyro_kn_model, 
                                   phone_gyro_X_tst, 
                                   phone_gyro_y_tst)

watch_accel_score_personal = calculate_score(watch_accel_tree_model, 
                                   watch_accel_forest_model, 
                                   watch_accel_kn_model, 
                                   watch_accel_X_tst, 
                                   watch_accel_y_tst)

watch_gyro_score_personal = calculate_score(watch_gyro_tree_model, 
                                   watch_gyro_forest_model, 
                                   watch_gyro_kn_model, 
                                   watch_gyro_X_tst, 
                                   watch_gyro_y_tst)


In [13]:
#generate impersonal models for phone accel 
(phone_accel_tree_impersonal_model, 
 phone_accel_forest_impersonal_model, 
 phone_accel_kn_impersonal_model, 
 phone_accel_impersonal_X_tst, 
 phone_accel_impersonal_y_tst) = impersonal_model(phone_accel_features, phone_accel_class_label, phone_accel_subject)

Validating subject 0
Validating subject 1
Validating subject 2
Validating subject 3
Validating subject 4
Validating subject 5
Validating subject 6
Validating subject 7
Validating subject 8
Validating subject 9
Validating subject 10
Validating subject 11
Validating subject 12
Validating subject 13
Validating subject 14
Validating subject 15
Validating subject 16
Validating subject 17
Validating subject 18
Validating subject 19
Validating subject 20
Validating subject 21
Validating subject 22
Validating subject 23
Validating subject 24
Validating subject 25
Validating subject 26
Validating subject 27
Validating subject 28
Validating subject 29
Validating subject 30
Validating subject 31
Validating subject 32
Validating subject 33
Validating subject 34
Validating subject 35
Validating subject 36
Validating subject 37
Validating subject 38
Validating subject 39
Validating subject 40
Validating subject 41
Validating subject 42
Validating subject 43
Validating subject 44
Validating subject 4

In [13]:
#generate impersonal models for phone gyro 
(phone_gyro_tree_impersonal_model, 
 phone_gyro_forest_impersonal_model, 
 phone_gyro_kn_impersonal_model, 
 phone_gyro_impersonal_X_tst, 
 phone_gyro_impersonal_y_tst) = impersonal_model(phone_gyro_features[:,:18], phone_gyro_class_label, phone_gyro_subject)

Validating subject 0
Validating subject 1
Validating subject 2
Validating subject 3
Validating subject 4
Validating subject 5
Validating subject 6
Validating subject 7
Validating subject 8
Validating subject 9
Validating subject 10
Validating subject 11
Validating subject 12
Validating subject 13
Validating subject 14
Validating subject 15
Validating subject 16
Validating subject 17
Validating subject 18
Validating subject 19
Validating subject 20
Validating subject 21
Validating subject 22
Validating subject 23
Validating subject 24
Validating subject 25
Validating subject 26
Validating subject 27
Validating subject 28
Validating subject 29
Validating subject 30
Validating subject 31
Validating subject 32
Validating subject 33
Validating subject 34
Validating subject 35
Validating subject 36
Validating subject 37
Validating subject 38
Validating subject 39
Validating subject 40
Validating subject 41
Validating subject 42
Validating subject 43
Validating subject 44
Validating subject 4

In [13]:
#generate impersonal models for watch accel 
(watch_accel_tree_impersonal_model, 
 watch_accel_forest_impersonal_model, 
 watch_accel_kn_impersonal_model, 
 watch_accel_impersonal_X_tst, 
 watch_accel_impersonal_y_tst) = impersonal_model(watch_accel_features, watch_accel_class_label, watch_accel_subject)

Validating subject 0
Validating subject 1
Validating subject 2
Validating subject 3
Validating subject 4
Validating subject 5
Validating subject 6
Validating subject 7
Validating subject 8
Validating subject 9
Validating subject 10
Validating subject 11
Validating subject 12
Validating subject 13
Validating subject 14
Validating subject 15
Validating subject 16
Validating subject 17
Validating subject 18
Validating subject 19
Validating subject 20
Validating subject 21
Validating subject 22
Validating subject 23
Validating subject 24
Validating subject 25
Validating subject 26
Validating subject 27
Validating subject 28
Validating subject 29
Validating subject 30
Validating subject 31
Validating subject 32
Validating subject 33
Validating subject 34
Validating subject 35
Validating subject 36
Validating subject 37
Validating subject 38
Validating subject 39
Validating subject 40
Validating subject 41
Validating subject 42
Validating subject 43
Validating subject 44
Validating subject 4

In [15]:
#generate impersonal models for watch gyro 
(watch_gyro_tree_impersonal_model, 
 watch_gyro_forest_impersonal_model, 
 watch_gyro_kn_impersonal_model, 
 watch_gyro_impersonal_X_tst, 
 watch_gyro_impersonal_y_tst) = impersonal_model(watch_gyro_features, watch_gyro_class_label, watch_gyro_subject)


Validating subject 0
Validating subject 1
Validating subject 2
Validating subject 3
Validating subject 4
Validating subject 5
Validating subject 6
Validating subject 7
Validating subject 8
Validating subject 9
Validating subject 10
Validating subject 11
Validating subject 12
Validating subject 13
Validating subject 14
Validating subject 15
Validating subject 16
Validating subject 17
Validating subject 18
Validating subject 19
Validating subject 20
Validating subject 21
Validating subject 22
Validating subject 23
Validating subject 24
Validating subject 25
Validating subject 26
Validating subject 27
Validating subject 28
Validating subject 29
Validating subject 30
Validating subject 31
Validating subject 32
Validating subject 33
Validating subject 34
Validating subject 35
Validating subject 36
Validating subject 37
Validating subject 38
Validating subject 39
Validating subject 40
Validating subject 41
Validating subject 42
Validating subject 43
Validating subject 44
Validating subject 4

In [16]:
#calculate scores for impersonal model
phone_accel_score_impersonal = calculate_score(phone_accel_tree_impersonal_model, 
                                     phone_accel_forest_impersonal_model, 
                                     phone_accel_kn_impersonal_model, 
                                     phone_accel_impersonal_X_tst, 
                                     phone_accel_impersonal_y_tst)


phone_gyro_score_impersonal = calculate_score(phone_gyro_tree_impersonal_model, 
                                         phone_gyro_forest_impersonal_model, 
                                         phone_gyro_kn_impersonal_model, 
                                         phone_gyro_impersonal_X_tst, 
                                         phone_gyro_impersonal_y_tst)

watch_accel_score_impersonal = calculate_score(watch_accel_tree_impersonal_model, 
                                     watch_accel_forest_impersonal_model, 
                                     watch_accel_kn_impersonal_model, 
                                     watch_accel_impersonal_X_tst, 
                                     watch_accel_impersonal_y_tst)
        
watch_gyro_score_impersonal = calculate_score(watch_gyro_tree_impersonal_model, 
                                     watch_gyro_forest_impersonal_model, 
                                     watch_gyro_kn_impersonal_model, 
                                     watch_gyro_impersonal_X_tst, 
                                     watch_gyro_impersonal_y_tst)                        

In [24]:
pd.DataFrame(np.around(phone_accel_score_personal[0], 1), columns=cols)

Unnamed: 0,tree_accuracy,forest_accuracy,kn_accuracy,tree_recall,forest_recall,kn_recall,tree_precision,forest_precision,kn_precision,tree_f1,foresh_ft,kn_f1
0,89.7,98.4,98.4,89.7,98.4,98.4,89.4,95.1,94.1,88.3,96.2,95.6
1,87.2,98.0,95.4,83.8,94.1,91.7,82.2,95.7,93.1,82.0,94.6,91.9
2,80.8,86.5,90.0,79.2,84.8,88.2,74.1,87.3,83.8,74.5,85.3,85.0
3,60.5,74.5,66.1,60.5,74.5,66.1,60.1,76.2,59.1,57.9,73.1,59.9
4,72.3,86.0,73.0,72.3,86.0,73.0,73.2,86.8,68.6,69.5,84.7,68.0
5,64.5,78.8,63.8,62.0,75.7,61.3,70.3,87.0,65.0,63.4,78.5,60.9
6,71.5,84.5,81.6,71.5,84.5,81.6,66.0,82.3,74.8,66.7,81.6,75.7
7,55.9,77.5,65.5,55.9,77.5,65.5,55.3,74.3,55.1,53.4,72.5,56.8
8,56.0,73.3,53.9,54.9,71.9,52.8,51.9,69.4,46.1,50.3,68.7,47.0
9,64.2,72.6,49.9,62.9,71.2,48.9,59.8,78.2,53.5,57.6,71.8,49.2


In [25]:
pd.DataFrame(np.around(phone_gyro_score_personal[0], 1), columns=cols)

Unnamed: 0,tree_accuracy,forest_accuracy,kn_accuracy,tree_recall,forest_recall,kn_recall,tree_precision,forest_precision,kn_precision,tree_f1,foresh_ft,kn_f1
0,87.3,95.1,95.4,87.3,95.1,95.4,89.4,98.0,90.4,86.7,95.7,91.6
1,85.0,97.3,94.2,81.7,93.5,90.5,78.6,89.2,88.9,78.8,90.6,89.2
2,69.0,77.0,84.0,67.6,75.5,82.4,58.0,70.8,68.6,59.5,70.4,71.2
3,45.1,43.6,31.4,45.1,43.6,31.4,41.7,43.8,27.8,41.3,42.0,27.5
4,51.6,52.0,47.4,51.6,52.0,47.4,40.7,43.8,29.1,42.7,44.0,32.6
5,56.0,61.9,53.6,53.8,59.5,51.5,56.0,59.6,43.5,51.8,57.3,44.9
6,45.1,52.0,38.9,45.1,52.0,38.9,39.2,46.1,29.9,39.9,46.4,31.8
7,31.5,39.9,36.3,31.5,39.9,36.3,34.3,44.0,35.9,31.1,39.7,33.4
8,36.0,33.0,32.7,35.3,32.4,32.0,31.3,25.0,30.0,30.9,27.0,27.7
9,26.3,36.3,32.3,25.8,35.6,31.7,24.0,38.8,32.8,23.6,35.4,30.9


In [26]:
pd.DataFrame(np.around(watch_accel_score_personal[0], 1), columns=cols)

Unnamed: 0,tree_accuracy,forest_accuracy,kn_accuracy,tree_recall,forest_recall,kn_recall,tree_precision,forest_precision,kn_precision,tree_f1,foresh_ft,kn_f1
0,89.2,93.1,93.1,89.2,93.1,93.1,83.3,88.2,86.4,84.8,89.0,87.1
1,83.0,96.0,95.0,81.4,94.1,93.1,82.4,93.1,94.1,81.7,93.5,93.5
2,78.7,87.7,85.0,77.1,85.9,83.3,81.4,89.7,87.3,75.9,86.2,83.2
3,59.8,68.4,69.0,59.8,68.4,69.0,63.0,79.4,73.7,58.8,70.7,67.4
4,63.7,75.0,68.8,63.7,75.0,68.8,66.6,79.3,73.9,62.0,74.3,69.2
5,75.8,83.8,83.8,74.3,82.2,82.2,83.1,92.1,87.1,75.9,85.3,82.5
6,81.9,96.6,90.7,81.9,96.6,90.7,78.8,94.8,92.0,78.4,93.9,90.8
7,71.5,82.5,85.4,71.5,82.5,85.4,64.9,80.7,72.4,65.0,78.9,76.1
8,70.9,83.3,78.4,70.9,83.3,78.4,47.4,59.8,52.6,52.9,65.8,60.2
9,59.2,78.8,76.8,58.0,77.3,75.3,62.3,83.7,78.9,57.4,77.9,74.3


In [27]:
pd.DataFrame(np.around(watch_gyro_score_personal[0], 1), columns=cols)

Unnamed: 0,tree_accuracy,forest_accuracy,kn_accuracy,tree_recall,forest_recall,kn_recall,tree_precision,forest_precision,kn_precision,tree_f1,foresh_ft,kn_f1
0,67.0,78.8,79.7,67.0,78.8,79.7,61.3,79.7,71.4,62.3,76.9,73.5
1,82.0,90.0,93.0,80.4,88.2,91.2,75.5,84.3,90.2,76.4,85.6,90.2
2,53.8,73.6,57.3,50.7,69.3,53.9,52.9,64.9,51.1,49.0,64.7,50.2
3,58.5,67.3,60.9,58.5,67.3,60.9,54.2,74.5,62.9,54.7,68.8,59.5
4,56.2,64.2,57.2,55.1,62.9,56.0,54.1,72.2,61.9,51.2,64.4,55.5
5,63.7,75.0,82.8,62.4,73.5,81.2,69.4,83.0,80.4,63.3,75.2,78.6
6,67.3,81.4,72.5,67.3,81.4,72.5,66.3,79.1,72.9,65.0,79.3,71.8
7,57.3,78.0,74.2,56.2,76.5,72.7,59.7,66.8,58.1,55.7,68.8,61.8
8,52.9,67.6,52.9,52.9,67.6,52.9,33.5,46.6,39.1,39.1,52.0,42.5
9,47.0,69.2,60.8,46.1,67.8,59.6,45.1,66.2,59.4,43.3,63.1,55.8


In [18]:
pd.DataFrame(np.around(phone_accel_score_impersonal[0], 1), columns=cols)

Unnamed: 0,tree_accuracy,forest_accuracy,kn_accuracy,tree_recall,forest_recall,kn_recall,tree_precision,forest_precision,kn_precision,tree_f1,foresh_ft,kn_f1
0,31.2,55.2,50.0,31.2,55.2,50.0,36.2,60.1,50.4,31.3,53.0,47.5
1,67.2,80.8,75.4,64.6,77.7,72.4,69.9,76.6,78.8,63.3,74.1,72.3
2,35.5,52.2,52.6,34.8,51.2,51.5,35.7,50.6,46.3,32.6,47.1,44.8
3,11.5,15.3,17.2,11.5,15.3,17.2,11.4,10.5,12.8,9.9,11.2,13.9
4,17.2,30.6,27.5,17.2,30.6,27.5,16.1,25.9,22.4,15.4,24.0,22.9
5,8.0,9.9,15.7,7.7,9.5,15.1,9.4,11.7,12.0,7.1,9.4,12.7
6,18.6,28.5,20.0,18.6,28.5,20.0,17.7,29.1,21.9,16.8,26.2,19.2
7,10.9,10.0,9.1,10.9,10.0,9.1,10.2,9.2,9.8,9.8,8.8,8.8
8,8.9,9.1,9.5,8.8,8.9,9.3,8.2,10.4,9.3,7.7,8.6,8.9
9,8.3,10.5,8.2,8.1,10.3,8.1,9.7,10.7,10.2,7.5,9.4,8.4


In [18]:
pd.DataFrame(np.around(phone_gyro_score_impersonal[0], 1), columns=cols)

Unnamed: 0,tree_accuracy,forest_accuracy,kn_accuracy,tree_recall,forest_recall,kn_recall,tree_precision,forest_precision,kn_precision,tree_f1,foresh_ft,kn_f1
0,30.0,45.5,40.6,30.0,45.5,40.6,36.5,49.0,40.6,30.7,43.6,38.1
1,64.4,73.7,59.4,61.9,70.8,57.1,62.4,74.4,63.4,57.4,66.7,55.6
2,26.7,38.7,33.3,26.2,37.9,32.6,29.5,33.9,30.3,26.0,32.9,28.3
3,12.2,15.6,18.6,12.2,15.6,18.6,11.3,14.4,10.8,10.7,13.8,13.2
4,13.4,18.5,22.3,13.4,18.5,22.3,12.8,17.1,13.3,12.4,15.7,16.1
5,11.5,12.1,9.2,11.0,11.6,8.8,11.1,11.1,6.7,10.7,10.3,7.3
6,13.8,16.0,13.6,13.8,16.0,13.6,14.8,24.2,16.1,13.5,17.5,13.8
7,7.9,8.9,11.9,7.9,8.9,11.9,8.4,11.5,12.5,7.7,8.9,11.7
8,9.6,8.1,6.8,9.4,7.9,6.7,10.2,11.8,7.8,9.6,8.5,6.9
9,11.9,16.2,9.2,11.7,15.9,9.0,10.7,14.2,11.7,10.9,14.1,9.7


In [15]:
pd.DataFrame(np.around(watch_accel_score_impersonal[0], 1), columns=cols)

Unnamed: 0,tree_accuracy,forest_accuracy,kn_accuracy,tree_recall,forest_recall,kn_recall,tree_precision,forest_precision,kn_precision,tree_f1,foresh_ft,kn_f1
0,47.2,57.5,53.6,47.2,57.5,53.6,52.6,69.1,64.4,46.9,58.8,54.0
1,87.9,91.3,92.1,86.2,89.5,90.3,88.3,87.3,88.4,84.8,85.4,86.8
2,52.3,62.3,62.5,51.3,61.1,61.2,48.6,60.4,55.0,47.1,57.6,54.0
3,38.0,48.8,47.5,38.0,48.8,47.5,35.7,47.6,40.0,35.6,44.4,41.9
4,39.7,43.3,42.8,39.7,43.3,42.8,38.9,49.9,42.5,37.4,43.0,40.7
5,52.1,59.2,57.1,51.1,58.0,56.0,51.8,59.6,50.2,48.4,55.7,50.9
6,56.0,66.2,56.3,56.0,66.2,56.3,54.1,70.2,68.5,52.8,65.2,58.8
7,29.0,38.0,41.0,29.0,38.0,41.0,31.5,38.6,32.5,28.8,34.7,34.6
8,23.9,37.6,32.3,23.9,37.6,32.3,23.0,31.8,26.9,22.5,31.9,27.7
9,36.8,49.3,37.4,36.1,48.4,36.7,37.6,49.5,50.6,34.9,45.5,39.4


In [17]:
pd.DataFrame(np.around(watch_gyro_score_impersonal[0], 1), columns=cols)

Unnamed: 0,tree_accuracy,forest_accuracy,kn_accuracy,tree_recall,forest_recall,kn_recall,tree_precision,forest_precision,kn_precision,tree_f1,foresh_ft,kn_f1
0,25.8,36.7,30.9,25.8,36.7,30.9,26.2,39.4,32.1,24.5,35.1,28.9
1,65.6,85.3,73.3,64.3,83.6,71.9,72.3,78.8,76.2,64.9,77.1,70.0
2,22.3,27.2,27.8,21.0,25.6,26.2,20.9,33.5,24.2,20.0,26.7,23.9
3,38.6,45.9,47.0,38.6,45.9,47.0,38.1,50.2,40.2,36.9,44.3,40.4
4,25.7,39.2,35.3,25.2,38.5,34.6,25.9,43.5,30.8,24.6,37.2,31.4
5,46.6,57.1,54.9,45.7,56.0,53.9,46.4,55.8,47.4,44.3,52.7,47.3
6,47.9,66.3,58.5,47.9,66.3,58.5,48.9,64.9,59.9,46.2,62.5,56.4
7,24.6,35.6,28.9,24.1,34.9,28.3,23.7,34.1,28.2,23.1,32.6,25.4
8,24.5,36.0,30.0,24.5,36.0,30.0,25.2,30.9,32.1,23.8,31.4,28.9
9,26.7,38.6,32.2,26.2,37.8,31.6,25.8,41.9,32.4,24.7,35.7,30.2


In [None]:
#write splitted time segments to file for later use
write_to_file({"input": phone_accel_input, "subject":phone_accel_subjectid, "output":phone_accel_output}, "phone_accel_200")
write_to_file({"input": phone_gyro_input, "subject":phone_gyro_subjectid, "output":phone_gyro_output}, "phone_gyro_200")
write_to_file({"input": watch_accel_input, "subject":watch_accel_subjectid, "output":watch_accel_output}, "watch_accel_200")
write_to_file({"input": watch_gyro_input, "subject":watch_gyro_subjectid, "output":watch_gyro_output}, "watch_gyro_200")

In [None]:
#read data file from pickle dumps if required
data=read_from_file("phone_accel_400")
phone_accel_input, phone_accel_subjectid, phone_accel_output = data["input"], data["subject"], data["output"]

data=read_from_file("phone_gyro_400")
phone_gyro_input, phone_gyro_subjectid, phone_gyro_output =  data["input"], data["subject"], data["output"]

data=read_from_file("watch_accel_400")
watch_accel_input, watch_accel_subjectid, watch_accel_output =  data["input"], data["subject"], data["output"]

data=read_from_file("watch_gyro_400")
watch_gyro_input, watch_gyro_subjectid, watch_gyro_output =  data["input"], data["subject"], data["output"]

In [13]:
#read features from pickle dumps if required
data=read_from_file("phone_accel_features")
(phone_accel_features, phone_accel_class_label, phone_accel_subject) = data[0], data[1], data[2]

data=read_from_file("phone_gyro_features")
(phone_gyro_features, phone_gyro_class_label, phone_gyro_subject) = data[0], data[1], data[2]

data=read_from_file("watch_accel_features")
(watch_accel_features, watch_accel_class_label, watch_accel_subject) = data[0], data[1], data[2]

data=read_from_file("watch_gyro_features")
(watch_gyro_features, watch_gyro_class_label, watch_gyro_subject) = data[0], data[1], data[2]