### Imports

In [1]:
%pip install seaborn sklearn keras_tuner lightgbm plotly &> /dev/null

Note: you may need to restart the kernel to use updated packages.


In [2]:
# To store data
# Logging
import logging
import warnings

import matplotlib.pyplot as plt

# To do linear algebra
import numpy as np

# To get new datatypes and functions
from cycler import cycler
from matplotlib.cm import get_cmap

# To create plots
from matplotlib.colors import rgb2hex
from numpy import pi
from scipy.optimize import curve_fit

import pandas as pd
import plotly.graph_objs as go

# To create nicer plots
import seaborn as sns
from keras.layers import Dense, Dropout

# To build models
from keras.models import Sequential
from keras_tuner import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
from kerastuner.tuners import RandomSearch

# To gbm light
from lightgbm import LGBMClassifier

# To create interactive plots
from plotly.offline import init_notebook_mode, iplot

# To process data
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow import keras
from tensorflow.keras import layers

# To investigate distributions
# from scipy.stats import norm, probplot, skew
# from sklearn.decomposition import PCA
# from sklearn.manifold import TSNE
# from sklearn.metrics import accuracy_score
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler

logging.getLogger("tensorflow").setLevel(logging.ERROR)
warnings.filterwarnings("ignore")
init_notebook_mode(connected=True)


2023-05-30 19:28:05.366887: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-30 19:28:05.663750: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-30 19:28:05.663771: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-05-30 19:28:06.986192: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

### Uncompressing data

In [3]:
!./unzip.sh test.csv.zip train.csv.zip

Archive:  test.csv.zip
checkdir:  cannot create extraction directory: test.csv
           File exists
File test.csv.zip unzipped successfully to test.csv
Archive:  train.csv.zip
checkdir:  cannot create extraction directory: train.csv
           File exists
File train.csv.zip unzipped successfully to train.csv


In [4]:
# Load datasets
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Combine boths dataframes
train_data["Data"] = "Train"
test_data["Data"] = "Test"
both_data = pd.concat([train_data, test_data], axis=0).reset_index(drop=True)
both_data["subject"] = "#" + both_data["subject"].astype(str)

# Create label
label = both_data.pop("Activity")

print("Shape Train:\t{}".format(train_data.shape))
print("Shape Test:\t{}\n".format(test_data.shape))

train_data.head()


Shape Train:	(7352, 564)
Shape Test:	(2947, 564)



Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity,Data
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING,Train
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING,Train
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING,Train
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING,Train
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING,Train


### Exploratory Data Analysis

In [5]:
train_data.shape


(7352, 564)

In [6]:
train_data.head()


Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity,Data
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING,Train
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING,Train
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING,Train
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING,Train
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING,Train


In [7]:
train_data.describe()


Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject
count,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,...,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0,7352.0
mean,0.274488,-0.017695,-0.109141,-0.605438,-0.510938,-0.604754,-0.630512,-0.526907,-0.60615,-0.468604,...,-0.307009,-0.625294,0.008684,0.002186,0.008726,-0.005981,-0.489547,0.058593,-0.056515,17.413085
std,0.070261,0.040811,0.056635,0.448734,0.502645,0.418687,0.424073,0.485942,0.414122,0.544547,...,0.321011,0.307584,0.336787,0.448306,0.608303,0.477975,0.511807,0.29748,0.279122,8.975143
min,-1.0,-1.0,-1.0,-1.0,-0.999873,-1.0,-1.0,-1.0,-1.0,-1.0,...,-0.995357,-0.999765,-0.97658,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0
25%,0.262975,-0.024863,-0.120993,-0.992754,-0.978129,-0.980233,-0.993591,-0.978162,-0.980251,-0.936219,...,-0.542602,-0.845573,-0.121527,-0.289549,-0.482273,-0.376341,-0.812065,-0.017885,-0.143414,8.0
50%,0.277193,-0.017219,-0.108676,-0.946196,-0.851897,-0.859365,-0.950709,-0.857328,-0.857143,-0.881637,...,-0.343685,-0.711692,0.009509,0.008943,0.008735,-0.000368,-0.709417,0.182071,0.003181,19.0
75%,0.288461,-0.010783,-0.097794,-0.242813,-0.034231,-0.262415,-0.29268,-0.066701,-0.265671,-0.017129,...,-0.126979,-0.503878,0.150865,0.292861,0.506187,0.359368,-0.509079,0.248353,0.107659,26.0
max,1.0,1.0,1.0,1.0,0.916238,1.0,1.0,0.967664,1.0,1.0,...,0.989538,0.956845,1.0,1.0,0.998702,0.996078,1.0,0.478157,1.0,30.0


### Checking categorical imbalance

In [8]:
import plotly.express as px

# dummy change
fig = px.pie(train_data, names="Activity", width=700)
fig.update_layout(
    title={
        "text": "Activities distribution in the data",
        "y": 0.95,
        "x": 0.45,
        "xanchor": "center",
        "yanchor": "top",
    }
)
fig.show()


In [9]:
# Plotting data
label_counts = label.value_counts()

# Get colors
n = label_counts.shape[0]
colormap = get_cmap("viridis")
colors = [rgb2hex(colormap(col)) for col in np.arange(0, 1.01, 1 / (n - 1))]

# Create plot
data = go.Bar(x=label_counts.index, y=label_counts, marker=dict(color=colors))

layout = go.Layout(
    title="Smartphone Activity Label Distribution",
    xaxis=dict(title="Activity"),
    yaxis=dict(title="Count"),
)

fig = go.Figure(data=[data], layout=layout)
iplot(fig)


## Prepare Train And Test Data

In [10]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")
x_train, y_train = train_data.iloc[:, :-2], train_data.iloc[:, -1:]
x_test, y_test = test_data.iloc[:, :-2], test_data.iloc[:, -1:]
x_train.shape, y_train.shape


((7352, 561), (7352, 1))

In [11]:
x_test, y_test = test_data.iloc[:, :-2], test_data.iloc[:, -1:]
x_test.shape, y_test.shape


((2947, 561), (2947, 1))

### One-hot Encoding

In [12]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)


In [13]:
x_test.shape, y_test.shape, x_train.shape, y_train.shape


((2947, 561), (2947,), (7352, 561), (7352,))

### Scaling

In [14]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


## Base Model

In [15]:
model = Sequential()
model.add(
    Dense(
        units=64,
        kernel_initializer="normal",
        activation="sigmoid",
        input_dim=x_train.shape[1],
    )
)
model.add(Dropout(0.2))
model.add(Dense(units=6, kernel_initializer="normal", activation="softmax"))
model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
history = model.fit(
    x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test)
)
model.summary()


2023-05-30 19:28:18.198549: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-30 19:28:18.198890: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-30 19:28:18.199077: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2023-05-30 19:28:18.199130: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2023-05-30 19:28:18.199543: W tensorflow/c

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                35968     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 6)                 390       
                                                                 
Total params: 36,358
Trainable params: 36,358
Non-trainable params: 0
_________________________________________________________________


## Tuning the model

In [20]:
import tensorflow as tf

def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int("num_layers", 2, 25)):
        model.add(
            layers.Dense(
                units=hp.Int("units" + str(i), min_value=32, max_value=512, step=32),
                kernel_initializer=hp.Choice("initializer", ["uniform", "normal"]),
                activation=hp.Choice("activation", ["relu", "sigmoid", "tanh"]),
            )
        )
    model.add(
        layers.Dense(
            6,
            kernel_initializer=hp.Choice("initializer", ["uniform", "normal"]),
            activation="softmax",
        )
    )
    model.add(Dropout(0.2))
    
    model.compile(
        optimizer=tf.keras.optimizers.legacy.Adam(),  # changed from "adam"
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model



tuner = RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=5,
    executions_per_trial=3,
    directory="project",
    project_name="Human_activity_recognition",
)

tuner.search_space_summary()


Search space summary
Default search space size: 27
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 25, 'step': 1, 'sampling': 'linear'}
units0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
initializer (Choice)
{'default': 'uniform', 'conditions': [], 'values': ['uniform', 'normal'], 'ordered': False}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'sigmoid', 'tanh'], 'ordered': False}
units1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units3 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units4 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units5 (Int)

In [21]:
tuner.search(x_train, y_train, epochs=10, validation_data=(x_test, y_test))


In [22]:
tuner.results_summary()


Results summary
Results in project/Human_activity_recognition
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7fd33b27a680>
Trial summary
Hyperparameters:
num_layers: 10
units0: 160
initializer: normal
activation: relu
units1: 352
units2: 384
units3: 160
units4: 512
units5: 224
units6: 352
units7: 416
units8: 128
units9: 128
units10: 64
units11: 192
units12: 160
Score: 0.6795611381530762
Trial summary
Hyperparameters:
num_layers: 24
units0: 128
initializer: normal
activation: relu
units1: 448
units2: 128
units3: 416
units4: 480
units5: 160
units6: 32
units7: 384
units8: 448
units9: 224
units10: 96
units11: 480
units12: 384
units13: 32
units14: 32
units15: 32
units16: 32
units17: 32
units18: 32
units19: 32
units20: 32
units21: 32
units22: 32
units23: 32
Score: 0.5578554471333822
Trial summary
Hyperparameters:
num_layers: 6
units0: 320
initializer: uniform
activation: tanh
units1: 352
units2: 32
units3: 32
units4: 32
units5: 32
Score: 0.3504128356774648
Trial s

In [23]:
model = tuner.get_best_models(num_models=1)[0]
history = model.fit(x_train, y_train, epochs=51, validation_data=(x_test, y_test))


Epoch 1/51


2023-05-30 19:34:02.499895: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 16497888 exceeds 10% of free system memory.


Epoch 2/51
Epoch 3/51
Epoch 4/51
Epoch 5/51
Epoch 6/51
Epoch 7/51
Epoch 8/51
Epoch 9/51
Epoch 10/51
Epoch 11/51
Epoch 12/51
Epoch 13/51
Epoch 14/51
Epoch 15/51
Epoch 16/51
Epoch 17/51
Epoch 18/51
Epoch 19/51
Epoch 20/51
Epoch 21/51
Epoch 22/51
Epoch 23/51
Epoch 24/51
Epoch 25/51
Epoch 26/51
Epoch 27/51
Epoch 28/51
Epoch 29/51
Epoch 30/51
Epoch 31/51
Epoch 32/51
Epoch 33/51
Epoch 34/51
Epoch 35/51
Epoch 36/51
Epoch 37/51
Epoch 38/51
Epoch 39/51
Epoch 40/51
Epoch 41/51
Epoch 42/51
Epoch 43/51
Epoch 44/51
Epoch 45/51
Epoch 46/51
Epoch 47/51
Epoch 48/51
Epoch 49/51
Epoch 50/51
Epoch 51/51


In [25]:
model.summary()

import tensorflow as tf
from tensorflow import keras

Callback = tf.keras.callbacks.EarlyStopping(monitor="accuracy", patience=3)
mo_fitt = model.fit(
    x_train, y_train, epochs=200, validation_data=(x_test, y_test), callbacks=Callback
)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 160)               89920     
                                                                 
 dense_1 (Dense)             (None, 352)               56672     
                                                                 
 dense_2 (Dense)             (None, 384)               135552    
                                                                 
 dense_3 (Dense)             (None, 160)               61600     
                                                                 
 dense_4 (Dense)             (None, 512)               82432     
                                                                 
 dense_5 (Dense)             (None, 224)               114912    
                                                                 
 dense_6 (Dense)             (None, 352)               7

2023-05-30 19:38:28.345893: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 16497888 exceeds 10% of free system memory.


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200


In [None]:
accuracy = mo_fitt.history["accuracy"]
loss = mo_fitt.history["loss"]
validation_loss = mo_fitt.history["val_loss"]
validation_accuracy = mo_fitt.history["val_accuracy"]

# dynamically set x based on y
x = range(len(validation_loss))

plt.figure(figsize=(15, 7))

plt.subplot(2, 2, 1)
plt.plot(x, accuracy, label="Training Accuracy", color="blue", linewidth=2)
plt.plot(
    x, validation_accuracy, label="Validation Accuracy", color="green", linewidth=2
)
plt.legend(loc="lower right", fontsize=14)
plt.title("Accuracy: Training vs Validation", fontsize=16)
plt.xlabel("Epoch", fontsize=14)
plt.ylabel("Accuracy", fontsize=14)
plt.grid(True)

plt.subplot(2, 2, 2)
plt.plot(x, loss, label="Training Loss", color="blue", linewidth=2)
plt.plot(x, validation_loss, label="Validation Loss", color="green", linewidth=2)
plt.legend(loc="upper right", fontsize=14)
plt.title("Loss: Training vs Validation", fontsize=16)
plt.xlabel("Epoch", fontsize=14)
plt.ylabel("Loss", fontsize=14)
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
# Save model(s) for development purposes
import tensorflow as tf

# create a TFLiteConverter object
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# convert the model to TFLite format
tflite_model = converter.convert()

# save the TFLite model to a file
with open("./assets/model2.tflite", "wb") as f:
    f.write(tflite_model)
