In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn import metrics
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.metrics import ConfusionMatrixDisplay
from scikitplot.metrics import plot_roc
from scikitplot.metrics import plot_precision_recall
from scikitplot.metrics import plot_cumulative_gain
from scikitplot.metrics import plot_lift_curve

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn import random_projection
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.decomposition import IncrementalPCA
from sklearn.manifold import Isomap
from sklearn.manifold import TSNE
from sklearn.manifold import MDS
from sklearn.linear_model import LogisticRegression

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from scipy.spatial.distance import euclidean
from scipy.spatial.distance import cityblock
from tslearn.metrics import dtw, dtw_path, cdist_dtw, subsequence_cost_matrix
from scipy.spatial.distance import cdist
from pyts.metrics import dtw as dtw2



In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
#features = list()
with open('../features.txt') as f:
    features = [line.split()[1] for line in f.readlines()]
print('No of Features: {}'.format(len(features)))

No of Features: 561


# Loading Training Sets

In [4]:
X_train = pd.read_csv('X_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
X_train.columns = features

# add subject column to the dataframe
#X_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

y_train = pd.read_csv('y_train.txt', names=['Activity'], squeeze=True)
#y_train_labels = y_train.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS', 4:'SITTING', 5:'STANDING',6:'LAYING'})

# put all columns in a single dataframe
#train = X_train
#train['Activity'] = y_train

In [5]:
body_acc_x_train = pd.read_csv('Inertial Signals/body_acc_x_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_x_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [6]:
body_acc_y_train = pd.read_csv('Inertial Signals/body_acc_y_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_y_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [7]:
body_acc_z_train = pd.read_csv('Inertial Signals/body_acc_z_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_z_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [8]:
body_gyro_x_train = pd.read_csv('Inertial Signals/body_gyro_x_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_x_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)
#y_train = pd.read_csv('y_train.txt', names=['Activity'], squeeze=True)
#y_train_labels = y_train.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS', 4:'SITTING', 5:'STANDING',6:'LAYING'})

# put all columns in a single dataframe
#train = body_gyro_x_train
#train['Activity'] = y_train
#train['ActivityName'] = y_train_labels

In [9]:
body_gyro_y_train = pd.read_csv('Inertial Signals/body_gyro_y_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_y_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [10]:
body_gyro_z_train = pd.read_csv('Inertial Signals/body_gyro_z_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_z_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [11]:
total_acc_x_train = pd.read_csv('Inertial Signals/total_acc_x_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_x_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [12]:
total_acc_y_train = pd.read_csv('Inertial Signals/total_acc_y_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_y_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [13]:
total_acc_z_train = pd.read_csv('Inertial Signals/total_acc_z_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_z_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

# Loading Test Sets

In [14]:
X_test = pd.read_csv('../test/X_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
X_test.columns = features

# add subject column to the dataframe
#X_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

y_test = pd.read_csv('../test/y_test.txt', names=['Activity'], squeeze=True)
#y_test_labels = y_train.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS', 4:'SITTING', 5:'STANDING',6:'LAYING'})

# put all columns in a single dataframe
#train01 = X_test
#train01['Activity'] = y_test

In [15]:
body_acc_x_test = pd.read_csv('../test/Inertial Signals/body_acc_x_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_x_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [16]:
body_acc_y_test = pd.read_csv('../test/Inertial Signals/body_acc_y_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_y_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [17]:
body_acc_z_test = pd.read_csv('../test/Inertial Signals/body_acc_z_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_acc_z_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [18]:
body_gyro_x_test = pd.read_csv('../test/Inertial Signals/body_gyro_x_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_x_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [19]:
body_gyro_y_test = pd.read_csv('../test/Inertial Signals/body_gyro_y_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_y_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [20]:
body_gyro_z_test = pd.read_csv('../test/Inertial Signals/body_gyro_z_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_z_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [21]:
total_acc_x_test = pd.read_csv('../test/Inertial Signals/total_acc_x_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_x_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [22]:
total_acc_y_test = pd.read_csv('../test/Inertial Signals/total_acc_y_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_y_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

In [23]:
total_acc_z_test = pd.read_csv('../test/Inertial Signals/total_acc_z_test.txt', delim_whitespace=True, header=None, encoding='latin-1')
#total_acc_z_test['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)

# Data preparation

# Preparazione delle classi

In [24]:
body_gyro_x_train = pd.read_csv('Inertial Signals/body_gyro_x_train.txt', delim_whitespace=True, header=None, encoding='latin-1')
#body_gyro_x_train['subject'] = pd.read_csv('subject_train.txt', header=None, squeeze=True)
y_train = pd.read_csv('y_train.txt', names=['Activity'], squeeze=True)
y_train_labels = y_train.map({1: 'WALKING', 2:'WALKING_UPSTAIRS',3:'WALKING_DOWNSTAIRS', 4:'SITTING', 5:'STANDING',6:'LAYING'})

# put all columns in a single dataframe
#train = body_gyro_x_train
body_gyro_x_train['Activity'] = y_train
body_gyro_x_train['ActivityName'] = y_train_labels

In [25]:
sitting_train = []
for i in range(len(body_gyro_x_train)):
    if body_gyro_x_train.iloc[i]['ActivityName'] == 'SITTING':
        sitting_train.append(i)

standing_train = []
for i in range(len(body_gyro_x_train)):
    if body_gyro_x_train.iloc[i]['ActivityName'] == 'STANDING':
        standing_train.append(i)
        
laying_train = []
for i in range(len(body_gyro_x_train)):
    if body_gyro_x_train.iloc[i]['ActivityName'] == 'LAYING':
        laying_train.append(i)

In [26]:
droptrian = sitting_train + standing_train + laying_train

In [27]:
body_gyro_x_train_sbilanciato = body_gyro_x_train.drop(droptrian)

In [28]:
body_gyro_x_train_sbilanciato = body_gyro_x_train_sbilanciato.drop(columns=['Activity'])

In [29]:
body_gyro_x_train_sbilanciato = body_gyro_x_train_sbilanciato.drop(columns=['ActivityName'])

In [30]:
# Approssimazione: DFT, DWT, SVD, PLA, PAA, APCA, SAX

In [31]:
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import SymbolicAggregateApproximation

In [32]:
# SAX transform
n_paa_segments = 20
n_sax_symbols = 8
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
ts_sax = sax.fit_transform(body_gyro_x_train_sbilanciato)
sax_ts_inv = sax.inverse_transform(ts_sax)

In [33]:
X_seq = list()
for x in ts_sax:
    X_seq.append(x.ravel())

# Sequential pattern mining

In [34]:
from prefixspan import PrefixSpan

In [35]:
ps = PrefixSpan(X_seq)

In [36]:
#support = 15% dataset
a = ps.frequent(500)

In [37]:
b = ps.topk(3473)

In [38]:
for i in range(len(b)):
    if b[i][1] == [8]:
        print (b[i])

In [39]:
count = 0
newarr = []
for i in range(len(b)):
    if len(b[i][1]) == 5 and count < 6:
        newarr.append(b[i][1])
        count += 1

In [40]:
f = []
for i in range(len(b)):
    if len(b[i][1]) == 5:
        f.append(b[i][1])

In [41]:
temp = 0
for i in range(len(b)):
    if 4 in b[i][1]:
        temp +=1

In [42]:
import re

arrnew=[]
print(newarr)
for i in range(len(newarr)):
    print(newarr[i])
    string = str(newarr[i])
    string = re.sub("\[|\]", "", string)
    string = re.sub("\,", "", string)
    arrnew.append(string)

[[4, 4, 4, 4, 4], [3, 3, 3, 3, 3], [4, 4, 4, 4, 3], [4, 4, 4, 3, 3], [4, 4, 3, 4, 4], [4, 3, 3, 3, 3]]
[4, 4, 4, 4, 4]
[3, 3, 3, 3, 3]
[4, 4, 4, 4, 3]
[4, 4, 4, 3, 3]
[4, 4, 3, 4, 4]
[4, 3, 3, 3, 3]


In [43]:
ts_sax = ts_sax.reshape(3285,20)

In [44]:
ts_sax_nuovo=[]
for i in range(len(ts_sax)):
    string = str(ts_sax[i])
    string = re.sub("\[|\]", "", string)
    ts_sax_nuovo.append(string)

In [45]:
ind = []
index1 = []
index2 = []
index3 = []
index4 = []
index5 = []
index6 = []

count = 0
for i in range(len(arrnew)):
    count = 0
    for j in range(len(ts_sax_nuovo)):
        #print(ts_sax_nuovo[j], arrnew[i]
        if arrnew[i] in str(ts_sax_nuovo[j]):
            count +=1
            if i == 0:
                index1.append(j)
            if i == 1:
                index2.append(j)
            if i == 2:
                index3.append(j)
            if i == 3:
                index4.append(j)
            if i == 4:
                index5.append(j)
            if i == 5:
                index6.append(j)
    ind.append(count)

In [46]:
ind

[117, 123, 127, 121, 71, 105]

In [47]:
tmp = []
for i in range(len(y_train)):
    if y_train[i] == 1 or y_train[i] == 2 or y_train[i] == 3:
        tmp.append(y_train[i])
y_train_sbilanciato = pd.Series(tmp, copy=False)

In [48]:
#pattern 1
a1 = np.count_nonzero(y_train_sbilanciato[index1]==1)
a2 = np.count_nonzero(y_train_sbilanciato[index1]==2)
a3 = np.count_nonzero(y_train_sbilanciato[index1]==3)

#pattern 2
b1 = np.count_nonzero(y_train_sbilanciato[index2]==1)
b2 = np.count_nonzero(y_train_sbilanciato[index2]==2)
b3 = np.count_nonzero(y_train_sbilanciato[index2]==3)

#pattern 3
c1 = np.count_nonzero(y_train_sbilanciato[index3]==1)
c2 = np.count_nonzero(y_train_sbilanciato[index3]==2)
c3 = np.count_nonzero(y_train_sbilanciato[index3]==3)

#pattern 4
d1 = np.count_nonzero(y_train_sbilanciato[index4]==1)
d2 = np.count_nonzero(y_train_sbilanciato[index4]==2)
d3 = np.count_nonzero(y_train_sbilanciato[index4]==3)

#pattern 5
e1 = np.count_nonzero(y_train_sbilanciato[index5]==1)
e2 = np.count_nonzero(y_train_sbilanciato[index5]==2)
e3 = np.count_nonzero(y_train_sbilanciato[index5]==3)

#pattern 6
f1 = np.count_nonzero(y_train_sbilanciato[index6]==1)
f2 = np.count_nonzero(y_train_sbilanciato[index6]==2)
f3 = np.count_nonzero(y_train_sbilanciato[index6]==3)

In [49]:
d = {"P1" : [a1,a2,a3], "P2" : [b1,b2,b3], "P3" : [c1,c2,c3], "P4" : [d1,d2,d3], "P5" : [e1,e2,e3], "P6" : [f1,f2,f3]}
new = pd.DataFrame(data = d)

In [50]:
new

Unnamed: 0,P1,P2,P3,P4,P5,P6
0,53,36,72,54,21,46
1,44,66,40,49,35,40
2,20,21,15,18,15,19
