In [19]:
#Importing the important libraries
import pandas as pd
import glob
import os


In [20]:
#TASK 1: DATA CLEAINING & RETRIEVING

#Step 1: Data Retrieving

cwd = os.getcwd()
all_files = glob.glob(cwd+"/*.csv")
subs = []

#Appened each csv file in one list
#Change the index_col to 0 as the first column is already the index column
#Skip the starting 3000 Rows to each csv as they have redundant information

for filename in all_files:
    df = pd.read_csv(filename,header=None,index_col=0,skiprows=3000)
    subs.append(df)

#Combine all csv in the list into a datadrame
frame = pd.concat(subs, axis=0)

#Set the column header and reset the index column.
frame.columns=['x','y','z','label']
frame.reset_index(drop=True,inplace=True)
frame.head(10)

Unnamed: 0,x,y,z,label
0,1979,2377,2126,1
1,1976,2377,2131,1
2,1976,2375,2127,1
3,1973,2379,2132,1
4,1979,2375,2129,1
5,1980,2372,2132,1
6,1971,2370,2131,1
7,1975,2371,2130,1
8,1978,2367,2130,1
9,1979,2376,2126,1


In [21]:
#Step 2: Check for data types and replace them with correct values
frame.dtypes

x        int64
y        int64
z        int64
label    int64
dtype: object

In [22]:
#Step 3: Check for value types and missing values
for i in frame.columns:
    print(frame[i].value_counts(dropna=False))

2075    10208
2019     9914
2077     9311
2071     9248
2073     8985
        ...  
1444        1
2468        1
1454        1
2482        1
2375        1
Name: x, Length: 950, dtype: int64
2351    27102
2539    25297
2383    24140
2347    23471
2355    22829
        ...  
2962        1
2963        1
2960        1
2961        1
3453        1
Name: y, Length: 1211, dtype: int64
2031    16832
1999    16801
2003    15645
2002    13914
2001    13541
        ...  
2473        1
1450        1
1452        1
1454        1
2497        1
Name: z, Length: 947, dtype: int64
7    593563
1    563667
4    357064
3    216737
5     51498
2     47878
6     47770
0      3719
Name: label, dtype: int64


In [23]:
#Step 4: Remove labels with less count and partially repeated lables


indexLabel0 = frame[ frame['label'] == 0 ].index
indexLabel2 = frame[ frame['label'] == 2 ].index
indexLabel6 = frame[ frame['label'] == 6 ].index

# Delete these row indexes from dataFrame
frame.drop(indexLabel0 , inplace=True)
frame.drop(indexLabel2 , inplace=True)
frame.drop(indexLabel6 , inplace=True)
 #Get names of indexes for which column Age has value 30
frame['label'].value_counts()

7    593563
1    563667
4    357064
3    216737
5     51498
Name: label, dtype: int64

In [None]:
#TASK 2: DATA EXPLORATION
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

#Step 1: Explore Each Column

#Column X:
g=sns.catplot(x="label", y="x",jitter=False, data=frame)
g.set_ylabels("X axis")
g.set_xlabels("Labels")
plt.title("X axis range per activity")
plt.show(g)
print(frame['x'].describe())

In [None]:
#Column Y:
g=sns.catplot(x="label", y="y",jitter=False, data=frame)
g.set_ylabels("Y axis")
g.set_xlabels("Labels")
plt.title("Y axis range per activity")
plt.show(g)
print(frame['y'].describe())

In [None]:
#Column z:
g=sns.catplot(x="label", y="z",jitter=False, data=frame)
g.set_ylabels("Z axis")
g.set_xlabels("Labels")
plt.title("Z axis range per activity")
plt.show(g)
print(frame['z'].describe())

In [None]:
#Column label:
g=sns.countplot(x="label", data=frame, palette="Greens_d")
plt.ylabel("Value count per label")
plt.xlabel("Labels")
plt.title("Value counts of each activity")
plt.show(g)
print(frame['label'].describe())

In [None]:
#Step 2: Exploration between multiple columns

# X Y Z and label of 1st subject

# ax=subs[1].plot.line(subplots=True,figsize=(10,10),legend=False)
# plt.xlabel('Timestamp')
# plt.ylabel('Activity')
# ax[0].set_ylabel('X axis')
# ax[1].set_ylabel('Y axis')
# ax[2].set_ylabel('Z axis')
for i in range(5,10):
    df1=subs[i]
    df1.columns=['x','y','z','label']
    mask = df1['label'] == 4
    df1[mask].plot.line()
    plt.title("X,Y,Z axis of subject(%d) who is walking"% i)
    plt.ylabel("Acceleration")
    plt.xlabel("Timestamp")

In [None]:
#X Y Z of one activity and multiple sujects

df1=subs[10]
df1.columns=['x','y','z','label']
mask = df1['label'] == 1
df1[mask].plot.line()
plt.title("X,Y,Z axis of subject 11 for Working at Computer")
plt.ylabel("Acceleration")
plt.xlabel("Timestamp")

df1=subs[10]
df1.columns=['x','y','z','label']
mask = df1['label'] == 3
df1[mask].plot.line()
plt.title("X,Y,Z axis of subject 11 for Standing")
plt.ylabel("Acceleration")
plt.xlabel("Timestamp")

df1=subs[10]
df1.columns=['x','y','z','label']
mask = df1['label'] == 4
df1[mask].plot.line()
plt.title("X,Y,Z axis of subject 11 for Walking")    
plt.ylabel("Acceleration")
plt.xlabel("Timestamp")

df1=subs[10]
df1.columns=['x','y','z','label']
mask = df1['label'] == 5
df1[mask].plot.line()
plt.title("X,Y,Z axis of subject 11 for Going Up\Down Stairs")
plt.ylabel("Acceleration")
plt.xlabel("Timestamp")

df1=subs[10]
df1.columns=['x','y','z','label']
mask = df1['label'] == 7
df1[mask].plot.line()
plt.title("X,Y,Z axis of subject 11 for Talking while Standing")
plt.ylabel("Acceleration")
plt.xlabel("Timestamp")

In [24]:
#PART 3: DATA MODELING

#frame = frame.sample(frac=1).reset_index(drop=True)
activityLabel = frame['label']
activityLabel.head(10)

0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: label, dtype: int64

In [25]:
activityScale=frame[['x','y','z']]
activityScale.head()

Unnamed: 0,x,y,z
0,1979,2377,2126
1,1976,2377,2131
2,1976,2375,2127
3,1973,2379,2132
4,1979,2375,2129


In [26]:
frame.head(20)

Unnamed: 0,x,y,z,label
0,1979,2377,2126,1
1,1976,2377,2131,1
2,1976,2375,2127,1
3,1973,2379,2132,1
4,1979,2375,2129,1
5,1980,2372,2132,1
6,1971,2370,2131,1
7,1975,2371,2130,1
8,1978,2367,2130,1
9,1979,2376,2126,1


In [27]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [31]:
X_train, X_test, y_train, y_test = train_test_split(activityScale, activityLabel , test_size = 0.20,random_state=0)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
clf3 = KNeighborsClassifier(15)
fit  = clf3.fit(X_train, y_train)
predicted = fit.predict(X_test)
accuracy = clf3.score(X_test, y_test)
array1=confusion_matrix(y_test, predicted)
print(confusion_matrix(y_test, predicted))
print(classification_report(y_test, predicted))
print(accuracy)


In [None]:
from sklearn.tree import DecisionTreeClassifier

clf4 = DecisionTreeClassifier(max_depth=20)
fit  = clf4.fit(X_train, y_train)
predicted = fit.predict(X_test)
accuracy = clf4.score(X_test, y_test)
array2=confusion_matrix(y_test, predicted)
print(confusion_matrix(y_test, predicted))
print(classification_report(y_test, predicted))
print(accuracy)

In [None]:
from sklearn.metrics import plot_confusion_matrix
plt.figure(figsize=(10,10))
plot_confusion_matrix(clf3, X_test, y_test,include_values=False)
plt.show()
plot_confusion_matrix(clf4, X_test, y_test,include_values=False)
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf5 = RandomForestClassifier()
fit  = clf5.fit(X_train, y_train)
predicted = fit.predict(X_test)
accuracy = clf4.score(X_test, y_test)
array2=confusion_matrix(y_test, predicted)
print(confusion_matrix(y_test, predicted))
print(classification_report(y_test, predicted))
print(accuracy)

In [None]:
import sys

import tensorflow.keras
import pandas as pd
import sklearn as sk
import tensorflow as tf

print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tensorflow.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

In [42]:
# import tensorflow as tf
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

get_available_gpus()

from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 15504246544438275720
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 4001523786187394809
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4826005504
locality {
  bus_id: 1
  links {
  }
}
incarnation: 11136135847854911763
physical_device_desc: "device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 12443854604198512990
physical_device_desc: "device: XLA_GPU device"
]


In [40]:
import pandas as pd
import numpy as np
import tensorflow as tf

from keras import backend as K
from keras.models import Sequential
from keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, BatchNormalization
from keras.layers.core import Dense, Dropout
from keras.regularizers import l1, l2, l1_l2
from sklearn.metrics import accuracy_score

epochs = 30
batch_size = 16
n_hidden = 32


timesteps = 128
input_dim = 3
n_classes = 5

model = Sequential()
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='sigmoid'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
with tf.device(tf.DeviceSpec(device_type="GPU", device_index=0)):
    model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

# Evaluate
print(confusion_matrix(Y_test, model.predict(X_test)))

Epoch 1/30


ValueError: in user code:

    D:\Software\Python\lib\site-packages\tensorflow\python\keras\engine\training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    D:\Software\Python\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    D:\Software\Python\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    D:\Software\Python\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    D:\Software\Python\lib\site-packages\tensorflow\python\keras\engine\training.py:531 train_step  **
        y_pred = self(x, training=True)
    D:\Software\Python\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:886 __call__
        self.name)
    D:\Software\Python\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:180 assert_input_compatibility
        str(x.shape.as_list()))

    ValueError: Input 0 of layer sequential_2 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 3]
