# Principal Component Analysis 

In [1]:
%matplotlib widget

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# PCA lib
from sklearn.decomposition import PCA

In [37]:
data = pd.read_csv('../data/aggregated/aggregated_1s_outliers_imputations.csv')
data.index = data.attr_time
data = data.drop(['attr_time'], axis=1)
print(data.shape)
data.describe()

(40889, 11)


Unnamed: 0,attr_x,attr_y,attr_z,attr_azimuth,attr_pitch,attr_roll
count,40889.0,40889.0,40889.0,40889.0,40889.0,40889.0
mean,0.405864,2.506323,-7.418309,183.933622,-70.557582,2.303889
std,1.315252,4.130076,3.955022,61.696555,140.657229,7.975448
min,-6.295932,-8.167532,-10.198701,0.19636,-179.985784,-34.890335
25%,-0.195463,0.028519,-9.614575,151.067813,-174.46202,-1.133918
50%,0.52439,0.936528,-9.507519,176.028911,-159.471571,3.045493
75%,1.232643,3.324648,-8.692197,208.455981,-68.584006,7.316644
max,5.57776,12.572045,10.123882,359.922203,179.983899,42.920508


In [8]:
data.columns

Index(['attr_x', 'attr_y', 'attr_z', 'attr_azimuth', 'attr_pitch', 'attr_roll',
       'Label_ClimbingDown', 'Label_ClimbingUp', 'Label_Sitting',
       'Label_Standing', 'Label_Walking'],
      dtype='object')

## Accelerometer plot

In [12]:
fig = plt.figure(figsize=(10, 10))

lbl_cnt = 0
for i in range(2):
    for j in range(3):
        if i == 1 and j > 3:
            break

        if lbl_cnt < 5: 
            ax = fig.add_subplot(2 ,3 , lbl_cnt+1, projection='3d')
            ax.scatter3D(data.attr_x, data.attr_y, data.attr_z, c=data.iloc[:, 6+lbl_cnt]);
            ax.set_title(data.columns[6+lbl_cnt])
            ax.set_xlabel('attr_x')
            ax.set_ylabel('attr_y')
            ax.set_ylabel('attr_z')
            
        lbl_cnt += 1
        
plt.savefig('../figures/acc_per_label.pdf', format='pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [6]:
# fig = plt.figure()
# ax = plt.axes(projection='3d')
# ax.scatter3D(data.attr_x, data.attr_y, data.attr_z) #, c=data.Label_Walking);

## Device Orientation Plot

In [11]:
fig = plt.figure(figsize=(10, 10))

lbl_cnt = 0
for i in range(2):
    for j in range(3):
        if i == 1 and j > 3:
            break

        if lbl_cnt < 5: 
            ax = fig.add_subplot(2 ,3 , lbl_cnt+1, projection='3d')
            ax.scatter3D(data.attr_azimuth, data.attr_pitch, data.attr_roll, c=data.iloc[:, 6+lbl_cnt]);
            ax.set_title(data.columns[6+lbl_cnt])
            ax.set_xlabel('attr_x')
            ax.set_ylabel('attr_y')
            ax.set_ylabel('attr_z')
            
        lbl_cnt += 1
        
plt.savefig('../figures/ori_per_label.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## PCA

In [13]:
features = data.iloc[:, :6].columns.values

In [14]:
normalized_data = data.copy()
normalized_data[features] = (data[features]-data[features].min())/(data[features].max()-data[features].min())
normalized_data.describe()

Unnamed: 0,attr_x,attr_y,attr_z,attr_azimuth,attr_pitch,attr_roll
count,40889.0,40889.0,40889.0,40889.0,40889.0,40889.0
mean,0.564424,0.514661,0.136813,0.51077,0.303993,0.478008
std,0.11077,0.19914,0.194612,0.17151,0.390747,0.102498
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.51378,0.395189,0.028743,0.419407,0.015345,0.433827
50%,0.574406,0.43897,0.034011,0.488796,0.056989,0.487539
75%,0.634055,0.554118,0.07413,0.57894,0.309475,0.542431
max,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
pca = PCA(n_components=6)
pca.fit(normalized_data[features])

PCA(n_components=6)

In [22]:
np.cumsum(pca.explained_variance_ratio_)

array([0.56518459, 0.8113317 , 0.91749492, 0.9926239 ])

In [16]:
plt.figure(figsize=(10, 10))
plt.title('Principal Components Explained Variance')
plt.plot(np.cumsum(np.insert(pca.explained_variance_ratio_, 0, 0)))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x11fd90640>]

In [31]:
pca = PCA(n_components=4)
pca.fit(normalized_data[features])
pcas = pca.transform(normalized_data[features])
print(pcas.shape)

pcas_series = {f'pca_{i}': pd.Series(pcas.T[i]) for i in range(4)}
pcas_df = pd.DataFrame(pcas_series)

(40889, 4)


In [32]:
pcas_df

Unnamed: 0,pca_0,pca_1,pca_2,pca_3
0,0.014538,0.677151,0.130312,0.222946
1,0.021735,0.647497,0.129872,0.220391
2,0.023366,0.641095,0.135996,0.244909
3,0.028611,0.639040,0.125829,0.208919
4,0.033823,0.647885,0.050111,0.169323
...,...,...,...,...
40884,-0.022980,0.671719,0.473244,0.040975
40885,-0.029189,0.663747,-0.030057,0.215565
40886,-0.051041,0.646623,0.318168,0.221181
40887,-0.039232,0.661887,0.500035,0.091138


In [18]:
pd.DataFrame(pca.components_, columns=features)

Unnamed: 0,attr_x,attr_y,attr_z,attr_azimuth,attr_pitch,attr_roll
0,0.002932,-0.239156,-0.104037,-0.010963,0.965319,0.003285
1,0.105736,0.649896,0.708242,0.003361,0.236739,0.093759
2,-0.18285,0.03289,0.008268,0.967093,0.021164,-0.172336
3,-0.702263,0.065025,0.122185,-0.252595,0.030756,-0.650323


## Analysis of labels in PCAs

In [24]:
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(1 , 1, 1, projection='3d')
ax.scatter3D(pcas_df.iloc[: ,0], pcas_df.iloc[: ,1], pcas_df.iloc[: ,2], c=pcas_df.iloc[: ,3], cmap='Greens')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x121214310>

In [19]:
fig = plt.figure(figsize=(10, 10))

lbl_cnt = 0
for i in range(2):
    for j in range(3):
        if i == 1 and j > 3:
            break

        if lbl_cnt < 5:
            ax = fig.add_subplot(2 ,3 , lbl_cnt+1, projection='3d')
            ax.scatter3D(pcas_df.iloc[: ,0], pcas_df.iloc[: ,1], pcas_df.iloc[: ,2], c=data.iloc[:, 6+lbl_cnt])
            ax.set_title(data.columns[6+lbl_cnt])
            ax.set_xlabel('pca 1')
            ax.set_ylabel('pca 2')
            ax.set_ylabel('pca 3')
        lbl_cnt += 1

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
plt.show()

In [41]:
pcas_df

Unnamed: 0,pca_0,pca_1,pca_2,pca_3
0,0.014538,0.677151,0.130312,0.222946
1,0.021735,0.647497,0.129872,0.220391
2,0.023366,0.641095,0.135996,0.244909
3,0.028611,0.639040,0.125829,0.208919
4,0.033823,0.647885,0.050111,0.169323
...,...,...,...,...
40884,-0.022980,0.671719,0.473244,0.040975
40885,-0.029189,0.663747,-0.030057,0.215565
40886,-0.051041,0.646623,0.318168,0.221181
40887,-0.039232,0.661887,0.500035,0.091138


In [42]:
data.shape

(40889, 11)

In [53]:
final = data.reset_index().join(pcas_df)

In [54]:
final

Unnamed: 0,attr_time,attr_x,attr_y,attr_z,attr_azimuth,attr_pitch,attr_roll,Label_ClimbingDown,Label_ClimbingUp,Label_Sitting,Label_Standing,Label_Walking,pca_0,pca_1,pca_2,pca_3
0,2015-03-11 07:58:34,-0.831786,4.875592,9.513959,214.588255,-22.053092,-4.623449,False,False,False,False,False,0.014538,0.677151,0.130312,0.222946
1,2015-03-11 07:58:35,-0.858429,4.459560,9.043941,214.588255,-22.053092,-4.623449,False,False,False,False,False,0.021735,0.647497,0.129872,0.220391
2,2015-03-11 07:58:36,-1.273464,4.266862,9.139566,214.588255,-22.053092,-4.623449,False,False,False,False,False,0.023366,0.641095,0.135996,0.244909
3,2015-03-11 07:58:37,-0.664606,3.690015,9.443701,214.588255,-22.053092,-4.623449,False,False,False,False,False,0.028611,0.639040,0.125829,0.208919
4,2015-03-11 07:58:38,0.006021,3.790752,9.341636,191.741831,-20.222385,-2.729107,False,False,False,False,False,0.033823,0.647885,0.050111,0.169323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40884,2015-03-11 19:19:58,0.012438,5.625895,8.510872,350.116275,-33.330454,-0.766879,False,False,False,False,False,-0.022980,0.671719,0.473244,0.040975
40885,2015-03-11 19:19:59,-0.358253,6.073435,8.164868,158.743362,-36.486600,-3.068146,False,False,False,False,False,-0.029189,0.663747,-0.030057,0.215565
40886,2015-03-11 19:20:00,-1.125275,6.750274,7.516219,279.058298,-41.436647,-8.683954,False,False,False,False,False,-0.051041,0.646623,0.318168,0.221181
40887,2015-03-11 19:20:01,-1.125275,6.750274,7.516219,354.252050,-36.324947,0.610465,False,False,False,False,False,-0.039232,0.661887,0.500035,0.091138


AttributeError: 'DataFrame' object has no attribute 'pcas_0'