## Importing necessary libraries

In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, accuracy_score
from sklearn.decomposition import PCA

## Reading train and test files

In [2]:
df_train = pd.read_csv(r".\Smartphone Data\train.csv")
df_test = pd.read_csv(r".\Smartphone Data\test.csv")

In [3]:
df_train.head(5)

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING


## Compute the covariance matrix of the dataset

In [4]:
data_arr = df_train.iloc[:,0:561].values
cov_data = np.cov(data_arr,rowvar=False)
print(np.linalg.det(cov_data))

0.0


## Data preprocessing before traning the classifier

In [5]:
pca = PCA(n_components=100)
cov_pca = pca.fit(data_arr)

## Data transformation

In [6]:
data_train_pca = cov_pca.transform(data_arr)
df_train_red = pd.DataFrame(data_train_pca)

In [7]:
df_train_red['Subject'] = df_train['subject']
df_train_red['Activity'] = df_train['Activity']

## View the transformed dataset

In [10]:
df_train_red.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,Subject,Activity
0,-5.52028,-0.290278,-1.529929,1.333242,1.425089,-0.194708,0.577454,0.69149,-1.222866,-0.363414,...,-0.382804,0.196672,0.037025,0.026836,0.177754,0.059621,-0.122836,0.304309,1,STANDING
1,-5.53535,-0.08253,-1.924804,0.671273,0.67126,0.735144,-0.616908,-0.771714,-0.615496,-0.895525,...,-0.063785,0.137908,-0.072063,-0.057977,-0.050272,0.143428,0.042682,-0.057019,1,STANDING
2,-5.474988,0.287387,-2.144642,0.531806,0.207824,-0.037772,0.057628,0.093917,-0.063147,-0.216898,...,-0.13584,0.264654,0.018019,-0.140801,-0.103851,-0.089989,0.094197,0.009769,1,STANDING
3,-5.677232,0.897031,-2.01822,0.157125,0.759085,1.079547,-0.267805,-0.731391,0.281296,0.466269,...,0.572197,-0.256772,-0.125472,0.259999,0.042736,-0.225945,0.704772,0.281046,1,STANDING
4,-5.748749,1.162952,-2.139533,0.207823,0.47309,0.463035,-0.152227,-0.107509,0.289819,0.539206,...,-0.093292,0.108291,0.165886,-0.067533,-0.085802,0.127205,0.247204,-0.196926,1,STANDING


## Count the number of datapoints for each posture

In [9]:
df_train_red_STAND = df_train_red[df_train_red['Activity'] == 'STANDING']
df_train_red_SIT = df_train_red[df_train_red['Activity'] == 'SITTING']
df_train_red_LAY = df_train_red[df_train_red['Activity'] == 'LAYING']
df_train_red_WALK = df_train_red[df_train_red['Activity'] == 'WALKING']



print("Standing", df_train_red_STAND.shape)
print("Sitting",df_train_red_SIT.shape)
print("Laying",df_train_red_LAY.shape)
print("Walking",df_train_red_WALK.shape)


Standing (1374, 102)
Sitting (1286, 102)
Laying (1407, 102)
Walking (1226, 102)


## Transform test data

In [10]:
df_test.dropna(inplace=True)

In [11]:
data_test_red = cov_pca.transform(df_test.iloc[:,0:561].values)
df_test_red = pd.DataFrame(data_test_red)

In [12]:
df_test_red['Subject']=df_test['subject']
df_test_red['Activity']=df_test['Activity']
df_test_red.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,Subject,Activity
0,-2.686743,-1.216821,-0.722075,-0.117271,-0.54636,0.442701,-0.387679,0.327529,0.117682,-1.593977,...,-0.257283,-0.047032,-0.067887,0.105388,0.239627,0.009753,0.308368,-0.178536,2,STANDING
1,-4.331255,-0.766327,-1.128404,-0.152942,-0.505817,0.485306,-0.423214,0.805947,0.591644,-0.986363,...,-0.221295,0.049995,-0.092697,0.018115,-0.060076,-0.061292,-0.033829,-0.112673,2,STANDING
2,-4.98536,0.371301,-1.656858,-0.235971,-0.102938,-0.113283,-0.143993,0.279862,0.330125,-0.163722,...,-0.250623,-0.182676,-0.06603,-0.07931,-0.003294,-0.142725,0.008672,0.146375,2,STANDING
3,-5.099876,0.243743,-1.802703,0.263715,-0.050454,-0.794957,0.083771,0.6505,0.270365,0.321617,...,-0.125686,-0.208623,0.025626,-0.035291,-0.079443,-0.02735,0.055508,0.183519,2,STANDING
4,-5.023,-0.518739,-1.871078,0.106466,0.153655,-0.947262,0.668394,0.792005,0.93268,0.593944,...,-0.003066,0.100127,-0.124063,-0.049023,-0.030226,-0.086565,-0.080247,0.145036,2,STANDING


In [13]:
df_test_red.shape

(2947, 102)

## Calculating true labels

In [14]:
labels_true = []
for i in range(len(df_test_red)):
    if (df_test_red['Activity'].iloc[i]=='STANDING'):
        labels_true.append(0)
    if (df_test_red['Activity'].iloc[i]=='SITTING'):
        labels_true.append(1)
    if (df_test_red['Activity'].iloc[i]=='LAYING'):
        labels_true.append(2)
    if (df_test_red['Activity'].iloc[i]=='WALKING'):
        labels_true.append(3)
        
labels_true = np.array(labels_true)
labels_true.shape

(2056,)

In [15]:
from hmmlearn import hmm

ModuleNotFoundError: No module named 'hmmlearn'

In [16]:
#implementing hmm
#since there are 4 activity so fitting hmm for each activity

def HMM_F1score(N,M,labels_true):
    hmm_stand=hmm.GMMHMM(n_components=N,n_mix=M,covariance_type='diag')
    hmm_sit=hmm.GMMHMM(n_components=N,n_mix=M,covariance_type='diag')
    hmm_lay=hmm.GMMHMM(n_components=N,n_mix=M,covariance_type='diag')
    hmm_walk=hmm.GMMHMM(n_components=N,n_mix=M,covariance_type='diag')

    
    hmm_stand.fit(df_train_red_STAND.iloc[:,0:100].values)
    hmm_sit.fit(df_train_red_STAND.iloc[:,0:100].values)
    hmm_lay.fit(df_train_red_STAND.iloc[:,0:100].values)
    hmm_walk.fit(df_train_red_STAND.iloc[:,0:100].values)

    
    #calculating F1 score
    
    labels_predict = []
    for i in range(len(df_test_red)):
        log_likelihood_value = np.array([hmm_stand.score(df_test_red.iloc[i,0:100].values.reshape((1,100))),hmm_sit.score(df_test_red.iloc[i,0:100].values.reshape((1,100))),hmm_lay.score(df_test_red.iloc[i,0:100].values.reshape((1,100))),hmm_walk.score(df_test_red.iloc[i,0:100].values.reshape((1,100))),hmm_walk_d.score(df_test_red.iloc[i,0:100].values.reshape((1,100))),hmm_walk_u.score(df_test_red.iloc[i,0:100].values.reshape((1,100)))])
        labels_predict.append(np.argmax(log_likelihood_value))
        
    labels_predict = np.array(labels_predict)
    
    F1 = f1_score(labels_true,labels_predict,average='micro')
    acc = accuracy_score(labels_true, labels_predict)
    return F1,acc
      

In [17]:
states = np.arange(1,36,1)
states

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35])

In [24]:
F1_value_states = []
acc_value_states = []
for i in states:
    print("HMM has been trained for num_states= {}".format(i))
    f1,acc = HMM_F1score(i,1,labels_true)
    F1_value_states.append(f1)
    acc_value_states.append(acc)
fig,ax = plt.subplots(2,1)

ax[0].plot(F1_value_states)
ax[1].plot(acc_value_states)

plt.show()

HMM has been trained for num_states= 1


NameError: name 'hmm' is not defined

In [25]:
f_test = []
acc_test = []

for i in range(1,6):
    f1,acc1 = HMM_F1score(3,i,labels_true)
    f_test.append(f1)
    acc_test.append(acc1)
    
fig,ax = plt.subplots(2,1)

ax[0].plot(f_test)
ax[1].plot(acc_test)

plt.show()

NameError: name 'hmm' is not defined