## Unzip The Raw Data

In [1]:
!./unzip.sh UCI_HAR_Dataset.zip 2>&1 > /dev/null
!pip install tflite-model-maker

Collecting tflite-model-maker
  Using cached tflite_model_maker-0.4.2-py3-none-any.whl (577 kB)
Collecting tensorflowjs<3.19.0,>=2.4.0
  Downloading tensorflowjs-3.18.0-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.5/77.5 kB[0m [31m306.3 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tflite-model-maker
  Using cached tflite_model_maker-0.4.1-py3-none-any.whl (642 kB)
  Using cached tflite_model_maker-0.4.0-py3-none-any.whl (642 kB)
  Using cached tflite_model_maker-0.3.4-py3-none-any.whl (616 kB)
Collecting matplotlib<3.5.0,>=3.0.3
  Using cached matplotlib-3.4.3.tar.gz (37.9 MB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting tf-models-official==2.3.0
  Using cached tf_models_official-2.3.0-py2.py3-none-any.whl (840 kB)
Collecting tensorflowjs>=2.4.0
  Using cached tensorflowjs-4.4.0-py3-none-any.whl (85 kB)
Collecting neural-structured-learning>=1.3.1
  Using cached neural_structured_learning-1.4.0-py2.py3-non

In [4]:
import os
import warnings
import tensorflow as tf
import warnings

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
warnings.filterwarnings('ignore')

print(tf.__version__)


2.11.0


## Building The Dataset

In [5]:
import glob
import os

import numpy as np
import tensorflow.keras as keras
from tensorflow.keras.layers import LSTM, BatchNormalization, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import L1L2


def get_one_hot(targets, nb_classes):
    res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
    return res.reshape(list(targets.shape) + [nb_classes])


def load_y(subset):
    # Get the path
    path = f"UCI_HAR_Dataset/UCI_HAR_Dataset/{subset}/y_{subset}.txt"

    # Read the file
    y = np.loadtxt(path, delimiter=",", dtype=int)

    # # One-hot encode labels
    one_hot_labels = get_one_hot(y - 1, len(np.unique(y)))
    if subset == "train":
        assert one_hot_labels.shape == (
            7352,
            6,
        ), f"Wrong dimensions: {one_hot_labels.shape} should be (7352, 6)"
    if subset == "test":
        assert one_hot_labels.shape == (
            2947,
            6,
        ), f"Wrong dimensions: {one_hot_labels.shape} should be (2947, 6)"
    assert (
        y[0] - 1 == np.where(one_hot_labels[0] == np.max(one_hot_labels[0]))[0][0]
    ), f"Value mismatch {np.max(one_hot_labels[0])[0][0]} vs {y[13] - 1}"
    return one_hot_labels


def build_data(subset):
    if subset not in ["train", "val", "test"]:
        raise Exception(f"Invalid subset: {subset}")

    folder_path = f"UCI_HAR_Dataset/UCI_HAR_Dataset/{subset}/Inertial Signals/"

    # Get all signal files in folder
    signal_files = glob.glob(os.path.join(folder_path, "*.txt"))
    # print(signal_files)

    assert len(signal_files) == 9, f"No signal files found in {folder_path}"
    np.loadtxt(signal_files[0]).shape
    # print(f"{signal_shape}")

    # Determine signal order based on file names
    signal_order = [
        "body_acc_x_",
        "body_acc_y_",
        "body_acc_z_",
        "body_gyro_x_",
        "body_gyro_y_",
        "body_gyro_z_",
        "total_acc_x_",
        "total_acc_y_",
        "total_acc_z_",
    ]

    # file_prefix = "UCI_HAR_Dataset/UCI_HAR_Dataset/train/Inertial Signals/"
    # file_suffix = ".txt"
    signal_files = [
        f"UCI_HAR_Dataset/UCI_HAR_Dataset/{subset}/Inertial Signals/{x}{subset}.txt"
        for x in signal_order
    ]

    # Load signal data from each file and append to signals_data list
    signals_data = [np.loadtxt(x) for x in signal_files]

    # Transpose signal data array so that shape is (number of samples, number of timesteps, number of signals)
    signals_data = np.transpose(signals_data, (1, 2, 0))

    # Verify final shape of combined data
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    if subset == "train":
        assert signals_data.shape == (7352, 128, len(signal_files))
    else:
        assert signals_data.shape == (2947, 128, len(signal_files))
    return signals_data


def load_data():
    return build_data("train"), load_y("train"), build_data("test"), load_y("test")


# Loading the train and test data
X_train, y_train, X_test, y_test = load_data()


In [6]:
first_sample = X_train[0]
first_timestep = first_sample[0]
assert len(first_sample) == 128
assert first_timestep[0] == 1.8085150e-004, print(first_timestep[0])
assert first_timestep[1] == 1.0766810e-002, print(first_timestep[1])


In [7]:
# Debug
assert X_train.shape == (7352, 128, 9), print("Expected shape: (7352, 128, 9) get", X_train.shape)
assert X_test.shape == (2947, 128, 9), print("Expected: (2947, 128, 9) get", X_test.shape)
assert y_train.shape == (7352, 6), print("Expected: (7352, 6) get", y_train.shape)
assert y_test.shape == (2947, 6), print("Expected: (2947, 6) get", y_test.shape)
assert len(X_train[0][0]) == 9, print("Signals numbers not match")

In [11]:
import os
import pickle

# Create the "assets" folder if it does not exist
if not os.path.exists("assets"):
    os.mkdir("assets")

# Create the "assets/data" folder if it does not exist
data_folder = os.path.join("assets", "data")
if not os.path.exists(data_folder):
    os.mkdir(data_folder)

def save_data_to_pickle_shards(data, data_name, data_folder):
    # Check if the data already exists
    filename = os.path.join(data_folder, f"{data_name}_0.pickle")
    if os.path.exists(filename):
        print(f"{data_name} already exists in {data_folder}. Skipping data saving.")
        return

    if not os.path.exists(os.path.join(data_folder)):
        os.makedirs(os.path.join(data_folder))

    # Serialize your data
    serialized_data = pickle.dumps(data)

    # Split the serialized data into smaller chunks
    chunk_size = 50 * 1024 * 1024  # 50 megabytes
    chunks = [
        serialized_data[i : i + chunk_size]
        for i in range(0, len(serialized_data), chunk_size)
    ]

    # Save each chunk to a file in the "asset/data" folder
    for i, chunk in enumerate(chunks):
        filename = os.path.join(data_folder, f"{data_name}_{i}.pickle")
        with open(filename, "wb") as f:
            f.write(chunk)


save_data_to_pickle_shards(X_train, "X_train", data_folder)
save_data_to_pickle_shards(y_train, "y_train", data_folder)
save_data_to_pickle_shards(X_test, "X_test", data_folder)
save_data_to_pickle_shards(y_test, "y_test", data_folder)


In [12]:
import os
import pickle

def load_data_from_pickle_shards(data_name, data_folder):
    # Find all pickle files that match the data name
    files = sorted(
        [
            os.path.join(data_folder, f)
            for f in os.listdir(data_folder)
            if f.startswith(data_name)
        ]
    )

    # Load the data from each file
    data = b""
    for filename in files:
        with open(filename, "rb") as f:
            data += f.read()

    # Deserialize the data
    return pickle.loads(data)

# Load the data from the pickle shards
loaded_X_train = load_data_from_pickle_shards("X_train", data_folder)
loaded_y_train = load_data_from_pickle_shards("y_train", data_folder)
loaded_X_test = load_data_from_pickle_shards("X_test", data_folder)
loaded_y_test = load_data_from_pickle_shards("y_test", data_folder)

# Check if the loaded data matches the original data
assert loaded_X_train.shape == X_train.shape
assert loaded_y_train.shape == y_train.shape
assert loaded_X_test.shape == X_test.shape
assert loaded_y_test.shape == y_test.shape

assert (loaded_X_train == X_train).all()
assert (loaded_y_train == y_train).all()
assert (loaded_X_test == X_test).all()
assert (loaded_y_test == y_test).all()


In [6]:
# function to count the number of classes
def count_classes(y):
    return len(set([tuple(category) for category in y]))


timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = count_classes(y_train)

# Initializing parameters
n_epochs = 30
n_batch = 16

# Bias regularizer value - we will use elasticnet
regularizer = L1L2(0.01, 0.01)

print(f"Timesteps: {timesteps}")
print(f"Input dimention: {input_dim}")
print(f"Total samples: {len(X_train)}")


Timesteps: 128
Input dimention: 9
Total samples: 7352


In [7]:
# Model execution
model = Sequential()
model.add(
    LSTM(
        64,
        input_shape=(timesteps, input_dim),
        return_sequences=True,
        bias_regularizer=regularizer,
    )
)
model.add(BatchNormalization())
model.add(Dropout(0.50))
model.add(LSTM(48))
model.add(Dropout(0.50))
model.add(Dense(n_classes, activation="sigmoid"))
model.summary()


2023-04-26 04:20:02.994140: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-26 04:20:02.994233: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2023-04-26 04:20:02.994299: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2023-04-26 04:20:02.994514: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file: No such file or directory
2023-04-26 04:20:03.117029: W tensorfl

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128, 64)           18944     
                                                                 
 batch_normalization (BatchN  (None, 128, 64)          256       
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 128, 64)           0         
                                                                 
 lstm_1 (LSTM)               (None, 48)                21696     
                                                                 
 dropout_1 (Dropout)         (None, 48)                0         
                                                                 
 dense (Dense)               (None, 6)                 294       
                                                        

In [8]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
# Training the model
model.fit(
    X_train,
    y_train,
    batch_size=n_batch,
    validation_data=(X_test, y_test),
    epochs=n_epochs,
)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30

In [None]:
# Save model(s) for development purposes
import tensorflow as tf

# create a TFLiteConverter object
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# set the target ops to TFLITE_BUILTINS and SELECT_TF_OPS
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS,
]

# disable lowering tensor list operations
converter._experimental_lower_tensor_list_ops = False

# convert the model to TFLite format
tflite_model = converter.convert()

# save the TFLite model to a file
with open("./assets/model_lstm.tflite", "wb") as f:
    f.write(tflite_model)
