In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import DataProcess as DP

### Load Protocol and Optional Data:

In [None]:
for subj_n in DP.protocol_list:
    #load the data
    subj_filename='./PAMAP2_Dataset/Protocol/subject10'+str(subj_n)+'.dat'
    HR_rest,HR_max=DP.HR_lim[subj_n]
    dp=DP.dataprocess(subj_filename,HR_rest,HR_max)
    np.save('data'+str(subj_n)+'.npy', (dp.feat_labels,dp.data_segmented))
    
for subj_n in DP.optional_list:
    #load the data
    subj_filename='./PAMAP2_Dataset/Optional/subject10'+str(subj_n)+'.dat'
    HR_rest,HR_max=DP.HR_lim[subj_n]
    dp=DP.dataprocess(subj_filename,HR_rest,HR_max)
    np.save('odata'+str(subj_n)+'.npy', (dp.feat_labels,dp.data_segmented))

In [19]:
datalabels=[]
for i in DP.protocol_list:
    feature_names,datalabelsi=np.load('data'+str(i)+'.npy')
    datalabels.append(datalabelsi)
for i in DP.optional_list:
    feature_names,datalabelsi=np.load('odata'+str(i)+'.npy')
    datalabels.append(datalabelsi)

In [20]:
# Stack data from different subjects into one chunk:
datalabels=np.vstack(datalabels)

#shuffle data
np.random.shuffle(datalabels)

traindata=datalabels[:,:-1]
trainlabels=datalabels[:,-1]

In [21]:
print("Activities involved in protocol + optional data:")
for a in np.unique(trainlabels):
    print(int(a), DP.activity_dict[a])

Activities involved in protocol + optional data:
1 lying
2 sitting
3 standing
4 walking
5 running
6 cycling
7 nordic walking
9 watching TV
10 computer work
11 car driving
12 ascending stairs
13 descending stairs
16 vacuum cleaning
17 ironing
18 folding laundry
19 house cleaning
20 playing_soccer
24 rope jumping


## Ordinary Classification: (124 features)

### (Random Forest)

In [22]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier

In [23]:
X=datalabels[:,:-1]
y=datalabels[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

**Testing with test samples:**

In [24]:
cls_rf=RandomForestClassifier(random_state=42)
cls_rf.fit(X_train,y_train)
cls_rf.score(X_test,y_test)

0.98134473377380493

### (GradientBoostingClassifier)

In [25]:
from sklearn.ensemble import GradientBoostingClassifier
cls=GradientBoostingClassifier(learning_rate=0.2, n_estimators=75, max_depth=3)
cls.fit(X_train,y_train)
cls.score(X_test,y_test)

0.98717450446949084

In [26]:
y_pred=cls.predict(X_test)

print(confusion_matrix(y_test, y_pred))

[[166   0   1   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0]
 [  0 174   3   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0]
 [  0   1 164   0   0   0   0   0   0   0   0   0   1   1   0   2   0   0]
 [  0   0   0 258   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0  68   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0 143   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0 185   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0  80   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0 321   0   0   0   0   0   0   2   0   0]
 [  0   0   0   0   0   0   0   0   0  60   0   0   0   0   0   0   0   0]
 [  0   0   0   3   0   0   0   0   0   0  82   1   0   0   0   0   0   0]
 [  0   0   0   3   0   0   0   0   0   0   0  61   2   0   0   0   0   0]
 [  0   0   1   0   0   0   0   0   0   0   1   0 172   0   0   1   0   0]
 [  0   0   0   0   0   0

## LOSO Classification: (124 features)

In [14]:
#check LOSO cross validation!

scores=[]
for iout in range(9):
    pL = DP.protocol_list[:]
    index = pL.pop(iout)
    
    # Prepare for testing:
    feature_names,testdatalabels=np.load('data'+str(index)+'.npy')
    X_test=testdatalabels[:,:-1]
    y_test=testdatalabels[:,-1]
    
    # Prepare for training:
    datalabels=[]
    for i in pL:
        feature_names,datalabelsi=np.load('data'+str(i)+'.npy')
        datalabels.append(datalabelsi)
                    
    for j in DP.optional_list:
        feature_names,datalabelsi=np.load('odata'+str(j)+'.npy')
        datalabels.append(datalabelsi)
        
    datalabels=np.vstack(datalabels)

    # Shuffle data
    np.random.shuffle(datalabels)
    X_train=datalabels[:,:-1]
    y_train=datalabels[:,-1]

    # Classification:
    cls=RandomForestClassifier(random_state=42)
    cls.fit(X_train, y_train)
    scores.append(cls.score(X_test, y_test))
    
    
for jout in range(5):
    oL = DP.optional_list[:]
    index = oL.pop(jout)
    
    # Prepare for testing:
    feature_names,testdatalabels=np.load('odata'+str(index)+'.npy')
    X_test=testdatalabels[:,:-1]
    y_test=testdatalabels[:,-1]
    
    # Prepare for training:
    datalabels=[]
    for i in DP.protocol_list:
        feature_names,datalabelsi=np.load('data'+str(i)+'.npy')
        datalabels.append(datalabelsi)
                    
    for j in oL:
        feature_names,datalabelsi=np.load('odata'+str(j)+'.npy')
        datalabels.append(datalabelsi)
        
    datalabels=np.vstack(datalabels)
    
    # Shuffle data
    np.random.shuffle(datalabels)
    X_train=datalabels[:,:-1]
    y_train=datalabels[:,-1]

    # Classification:
    cls=RandomForestClassifier(random_state=42)
    cls.fit(X_train, y_train)
    scores.append(cls.score(X_test, y_test))
    

In [18]:
print("Accuracy of LOSO Random Forest Classification:")
for i in range(9):
    print("Exclude protocol {}: {}".format(i+1, scores[i]))
for j in range(9, 14):
    print("Exclude optional {}: {}".format(DP.optional_list[j-9], scores[j]))

Accuracy of LOSO Random Forest Classification:
Exclude protocol 1: 0.6418039895923677
Exclude protocol 2: 0.7981803143093466
Exclude protocol 3: 0.5588599752168525
Exclude protocol 4: 0.8124410933081998
Exclude protocol 5: 0.5126782884310618
Exclude protocol 6: 0.8770562770562771
Exclude protocol 7: 0.857278782112274
Exclude protocol 8: 0.6602409638554216
Exclude protocol 9: 0.043478260869565216
Exclude optional 1: 0.0
Exclude optional 5: 0.5931232091690545
Exclude optional 6: 0.2664233576642336
Exclude optional 8: 0.3862433862433862
Exclude optional 9: 0.4405506883604506
