In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering

In [2]:
df_phone_accel_orig = pd.read_csv(".\\processed\\phone\\accel\\data.csv")
df_watch_accel_orig = pd.read_csv(".\\processed\\watch\\accel\\data.csv")

df_phone_gyro_orig = pd.read_csv(".\\processed\\phone\\gyro\\data.csv")
df_watch_gyro_orig = pd.read_csv(".\\processed\\watch\\gyro\\data.csv")

# Approach 1: Using all data

## Watch

### Acc

In [3]:
X=df_watch_accel_orig.iloc[:,[3,4,5]].values

In [4]:
kmeans = KMeans(n_clusters=3, init ='k-means++', max_iter=1000, n_init=50,random_state=0 )
y_kmeans = kmeans.fit_predict(X)
df_watch_accel_orig['Label'] = y_kmeans 

In [5]:
column_names = ['Activity','Cluster']
cluster=pd.DataFrame(columns = column_names)
data = []
for act in df_watch_accel_orig['activity'].unique(): 
    df_accel_phone=df_watch_accel_orig[df_watch_accel_orig['activity']==act]
    dict1={'Activity':act,'Cluster':df_accel_phone.Label.mode()[0]}
    data.append(dict1)
cluster = cluster.append(data, True)

In [6]:
print(cluster.sort_values(by=['Cluster','Activity']))

   Activity Cluster
1         B       0
7         H       0
9         J       0
13        O       0
15        Q       0
17        S       0
0         A       1
2         C       1
4         E       1
12        M       1
14        P       1
3         D       2
5         F       2
6         G       2
8         I       2
10        K       2
11        L       2
16        R       2


### Gyro

In [7]:
X=df_watch_gyro_orig.iloc[:,[3,4,5]].values

In [8]:
kmeans = KMeans(n_clusters=3, init ='k-means++', max_iter=500, n_init=20,random_state=0 )
y_kmeans = kmeans.fit_predict(X)
df_watch_gyro_orig['Label'] = y_kmeans 

In [9]:
column_names = ['Activity','Cluster']
cluster1=pd.DataFrame(columns = column_names)
data = []
for act in df_watch_gyro_orig['activity'].unique(): 
    df_gyro_watch=df_watch_gyro_orig[df_watch_gyro_orig['activity']==act]
    dict1={'Activity':act,'Cluster':df_gyro_watch.Label.mode()[0]}
    data.append(dict1)
cluster1 = cluster1.append(data, True)

In [10]:
print(cluster1)

   Activity Cluster
0         A       1
1         B       1
2         C       1
3         D       1
4         E       1
5         F       1
6         G       1
7         H       1
8         I       1
9         J       1
10        K       1
11        L       1
12        M       1
13        O       1
14        P       1
15        Q       1
16        R       1
17        S       1


## Phone

### Acc

In [11]:
X=df_phone_accel_orig.iloc[:,[3,4,5]].values

In [12]:
kmeans = KMeans(n_clusters=3, init ='k-means++', max_iter=1000, n_init=50,random_state=0 )
y_kmeans = kmeans.fit_predict(X)
df_phone_accel_orig['Label'] = y_kmeans 

In [13]:
column_names = ['Activity','Cluster']
cluster=pd.DataFrame(columns = column_names)
data = []
for act in df_phone_accel_orig['activity'].unique(): 
    df_accel_phone=df_phone_accel_orig[df_phone_accel_orig['activity']==act]
    dict1={'Activity':act,'Cluster':df_accel_phone.Label.mode()[0]}
    data.append(dict1)
cluster = cluster.append(data, True)

In [14]:
print(cluster.sort_values(by=['Cluster','Activity']))

   Activity Cluster
3         D       0
5         F       0
6         G       0
8         I       0
16        R       0
0         A       1
1         B       1
2         C       1
4         E       1
9         J       1
10        K       1
11        L       1
12        M       1
13        O       1
14        P       1
15        Q       1
17        S       1
7         H       2


### Gyro

In [15]:
X=df_phone_gyro_orig.iloc[:,[3,4,5]].values

In [16]:
kmeans = KMeans(n_clusters=3, init ='k-means++', max_iter=500, n_init=20,random_state=0 )
y_kmeans = kmeans.fit_predict(X)
df_phone_gyro_orig['Label'] = y_kmeans 

In [17]:
column_names = ['Activity','Cluster']
cluster1=pd.DataFrame(columns = column_names)
data = []
for act in df_phone_gyro_orig['activity'].unique(): 
    df_gyro_phone=df_phone_gyro_orig[df_phone_gyro_orig['activity']==act]
    dict1={'Activity':act,'Cluster':df_gyro_phone.Label.mode()[0]}
    data.append(dict1)
cluster1 = cluster1.append(data, True)

In [18]:
print(cluster1)

   Activity Cluster
0         A       0
1         B       0
2         C       0
3         D       0
4         E       0
5         F       0
6         G       0
7         H       0
8         I       0
9         J       0
10        K       0
11        L       0
12        M       0
13        O       0
14        P       0
15        Q       0
16        R       0
17        S       0


# Approach 2: Centroid

In [19]:
column_names = ['Activity','x1','y1','z1','x2','y2','z2']
phone_df=pd.DataFrame(columns = column_names)
watch_df=pd.DataFrame(columns = column_names)

data1 = []
data2= []
for act in df_phone_accel_orig['activity'].unique(): 
    df_accel_phone=df_phone_accel_orig[df_phone_accel_orig['activity']==act]
    df_accel_watch=df_watch_accel_orig[df_watch_accel_orig['activity']==act]
    
    df_gyro_phone=df_phone_gyro_orig[df_phone_gyro_orig['activity']==act]
    df_gyro_watch=df_watch_gyro_orig[df_watch_gyro_orig['activity']==act]
    
    x1=df_accel_phone['x'].mean()
    x2=df_gyro_phone['x'].mean()
    
    y1=df_accel_phone['y'].mean()
    y2=df_gyro_phone['y'].mean()
    
    z1=df_accel_phone['z'].mean()
    z2=df_gyro_phone['z'].mean()
    
    
    dict1={'Activity':act,'x1':x1,'y1':y1,'z1':z1,'x2':x2,'y2':y2,'z2':z2}
    data1.append(dict1)
    
    x1=df_accel_watch['x'].mean()
    x2=df_gyro_watch['x'].mean()
    
    y1=df_accel_watch['y'].mean() 
    y2=df_gyro_watch['y'].mean()
    
    z1=df_accel_watch['z'].mean()
    z2=df_gyro_watch['z'].mean()
    
    dict2={'Activity':act,'x1':x1,'y1':y1,'z1':z1,'x2':x2,'y2':y2,'z2':z2}
    data2.append(dict2)

phone_df = phone_df.append(data1, True)   
watch_df = watch_df.append(data2, True)   

In [20]:
print(phone_df,watch_df)

   Activity        x1        y1        z1        x2        y2        z2
0         A  0.949236 -2.321588 -0.159745 -0.002077  0.011853 -0.009068
1         B  0.222337 -2.818930  0.089083 -0.029753 -0.029171 -0.015954
2         C  0.204692 -3.350358 -0.608602  0.010929 -0.012997  0.000366
3         D -0.113426 -0.898146  0.428868 -0.002636 -0.001546 -0.000102
4         E -0.596092 -2.040649 -0.718897 -0.002129 -0.001898 -0.001759
5         F -0.395725 -1.705319  0.260869 -0.002463 -0.000818 -0.002609
6         G -0.119938 -1.273479 -0.096596 -0.003026 -0.000630 -0.002935
7         H -0.008747 -0.766019  1.091866 -0.002210 -0.002025 -0.003482
8         I -0.734535 -0.576693  1.879696 -0.002238 -0.002376 -0.002614
9         J  1.259955 -1.162879  1.303108 -0.002164 -0.002564 -0.002209
10        K  0.894594 -1.044495  0.746232 -0.001805 -0.001858 -0.002799
11        L  1.006376 -0.786184  1.468486 -0.002117 -0.000573 -0.002013
12        M  0.317484 -2.736209 -0.313712  0.002068  0.008468 -0

# Phone

In [21]:
X=phone_df.iloc[:,[1,2,3,4,5,6]].values

In [22]:
clustering = AgglomerativeClustering(n_clusters=3).fit(X)
y_kmeans=clustering.labels_
phone_df['agl'] = y_kmeans 

In [23]:
print(phone_df.sort_values(by=['agl','Activity']))

   Activity        x1        y1        z1        x2        y2        z2  agl
3         D -0.113426 -0.898146  0.428868 -0.002636 -0.001546 -0.000102    0
5         F -0.395725 -1.705319  0.260869 -0.002463 -0.000818 -0.002609    0
6         G -0.119938 -1.273479 -0.096596 -0.003026 -0.000630 -0.002935    0
7         H -0.008747 -0.766019  1.091866 -0.002210 -0.002025 -0.003482    0
8         I -0.734535 -0.576693  1.879696 -0.002238 -0.002376 -0.002614    0
15        Q -0.170456 -1.742381  0.995668 -0.002439 -0.001620 -0.003130    0
16        R  0.168946 -1.060364 -0.077128 -0.002410 -0.001928 -0.001709    0
17        S -0.683708 -1.757010  0.172157 -0.003149 -0.001202 -0.002127    0
0         A  0.949236 -2.321588 -0.159745 -0.002077  0.011853 -0.009068    1
1         B  0.222337 -2.818930  0.089083 -0.029753 -0.029171 -0.015954    1
2         C  0.204692 -3.350358 -0.608602  0.010929 -0.012997  0.000366    1
4         E -0.596092 -2.040649 -0.718897 -0.002129 -0.001898 -0.001759    1

## Watch

In [24]:
X=watch_df.iloc[:,[1,2,3,4,5,6]].values

In [25]:
clustering = AgglomerativeClustering(n_clusters=3).fit(X)
y_kmeans=clustering.labels_
watch_df['agl'] = y_kmeans 

In [26]:
print(watch_df.sort_values(by=['agl','Activity']))

   Activity        x1        y1        z1        x2        y2        z2  agl
3         D  0.627913 -2.918544  5.225599  0.002877  0.000373 -0.000040    0
5         F -0.333785 -4.283944  6.053603  0.000827 -0.000038  0.000090    0
6         G -4.639791 -2.512403  2.787416 -0.002586 -0.001934 -0.000691    0
7         H -2.156690 -6.030677 -0.207847  0.000765 -0.020316 -0.012325    0
8         I -2.637463 -4.133676  4.109641 -0.002360 -0.011246  0.000858    0
9         J -2.893483 -5.812299  2.058436 -0.002236 -0.012013 -0.003880    0
10        K -2.707773 -4.412336  3.336305  0.000910  0.000764  0.000800    0
11        L -2.511362 -4.506612  2.899997 -0.001241 -0.006017 -0.002199    0
16        R -3.206922 -3.644230  1.921715 -0.003328 -0.005151  0.000178    0
1         B  1.100125 -6.987424 -0.464980 -0.251163 -0.017893  0.025555    1
13        O  1.916532 -4.586169 -0.460407 -0.106061 -0.034674  0.037822    1
15        Q  0.005822 -6.687586  2.402811 -0.000498 -0.000077  0.000375    1