# Import Libraries

In [1]:
import os
import warnings
warnings.filterwarnings('ignore')
from typing import List, Dict

# Import Required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# sklearn libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import (classification_report, 
                             confusion_matrix)
from sklearn.base import (BaseEstimator, 
                         TransformerMixin)


# tensorflow packages
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import (Adam, 
                                         AdamW)
from tensorflow.keras.losses import (SparseCategoricalCrossentropy,
                                    CategoricalCrossentropy)

# Load Dataset

The experiments have been carried out with a group of 30 volunteers within an age bracket of 19-48 years. Each person performed six activities (WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING) wearing a smartphone (Samsung Galaxy S II) on the waist. Using its embedded accelerometer and gyroscope, we captured 3-axial linear acceleration and 3-axial angular velocity at a constant rate of 50Hz.

Basically,our objective is to create this dataset

- Triaxial acceleration from the accelerometer (total acceleration) and the estimated body acceleration.
- Triaxial Angular velocity from the gyroscope. 
- A 561-feature vector with time and frequency domain variables. 
- Its activity label. 




#### Step 1 : Load data captured from accelerometer and gyroscope
- Triaxial acceleration from the accelerometer (total acceleration) and the estimated body acceleration 
- Triaxial Angular velocity from the gyroscope


##### Train

In [2]:
def load_data(filename: str) -> np.ndarray:
    data = pd.read_csv(filename, header = None, delim_whitespace=True)
    return data.to_numpy()


def merge_triaxial_into_sarray(path:str, prefix_filename:str = None, type_fil:str = None) -> np.ndarray:
    filenames = [os.path.join(path, prefix_filename + "_" + axial + "_"+ type_fil +".txt") for axial in ['x', 'y', 'z']]
    
    concated_array = np.array([])
    for filename in filenames:
        if concated_array.shape[0] == 0:
            concated_array  = load_data(filename)
        else:
            concated_array = np.dstack((concated_array, load_data(filename)))

    return concated_array


train_path = "../data/UCI_HAR_Dataset/train/InertialSignals/"

# Body Acceleration
body_acc = merge_triaxial_into_sarray(path = train_path, 
                           prefix_filename= "body_acc",
                           type_fil="train")

# Body Gyroscope
body_gyro = merge_triaxial_into_sarray(path = train_path, 
                                       prefix_filename= "body_gyro",
                                       type_fil="train")

# Total Acceleration
total_acc = merge_triaxial_into_sarray(path = train_path, 
                                       prefix_filename= "total_acc",
                                       type_fil="train")



# The numpy array looks like this: (samples, timesteps = 128 readings/window, features = [x, y, z])

print(f"Shape of body_acc : {body_acc.shape}")
print(f"Shape of body_gyro : {body_gyro.shape}")
print(f"Shape of total_acc : {total_acc.shape}")

Shape of body_acc : (7352, 128, 3)
Shape of body_gyro : (7352, 128, 3)
Shape of total_acc : (7352, 128, 3)


##### Test

In [3]:
test_path = "../data/UCI_HAR_Dataset/test/InertialSignals/"

# Body Acceleration
body_acc_test = merge_triaxial_into_sarray(path = test_path, 
                           prefix_filename= "body_acc",
                           type_fil="test")

# Body Gyroscope
body_gyro_test = merge_triaxial_into_sarray(path = test_path, 
                                       prefix_filename= "body_gyro",
                                       type_fil="test")

# Total Acceleration
total_acc_test = merge_triaxial_into_sarray(path = test_path, 
                                       prefix_filename= "total_acc",
                                       type_fil="test")


print(f"Shape of body_acc_test : {body_acc_test.shape}")
print(f"Shape of body_gyro_test : {body_gyro_test.shape}")
print(f"Shape of total_acc_test : {total_acc_test.shape}")

Shape of body_acc_test : (2947, 128, 3)
Shape of body_gyro_test : (2947, 128, 3)
Shape of total_acc_test : (2947, 128, 3)


#### Step 2: Merge body_acc, body_gyro and total_acc

In [4]:
X_train = np.concatenate((body_acc, body_gyro, total_acc), axis = 2)
X_test = np.concatenate((body_acc_test, body_gyro_test, total_acc_test), axis = 2)

print(f"Shape of the X_train: {X_train.shape}")
print(f"Shape of the X_test: {X_test.shape}")

Shape of the X_train: (7352, 128, 9)
Shape of the X_test: (2947, 128, 9)


##### Explanation Train Shape: (7352, 128, 9)

- Number of Samples : 7352
- Number of TimeSteps/Readings : 128 
- Information recorded across body acceleration, body gyroscope and Total acceleration

#### Step 3: Prepare label Dataset

In [5]:
y_train = load_data(filename= "../data/UCI_HAR_Dataset/train/y_train.txt")
y_test = load_data(filename= "../data/UCI_HAR_Dataset/test/y_test.txt" )

print(f"Shape of y_train: {y_train.shape}")
print(f"Shaoe of y_test: {y_test.shape}")

# For zero offset
y_train = y_train-1
y_test = y_test-1  

# Perform One Hot Encoding on label Dataset
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print(f"Shape of y_train (After Encoding): {y_train.shape}")
print(f"Shaoe of y_test (After Encoding): {y_test.shape}")

Shape of y_train: (7352, 1)
Shaoe of y_test: (2947, 1)
Shape of y_train (After Encoding): (7352, 6)
Shaoe of y_test (After Encoding): (2947, 6)


#### Step 4: Train Test Split

In [6]:
X_train, X_val, y_train, y_val =  train_test_split(X_train, y_train, train_size=0.8, random_state = 42)
print(f"Shape of X_train : {X_train.shape}")
print(f"Shape of X_val : {X_val.shape}")
print(f"Shape of y_train : {y_train.shape}")
print(f"Shape of y_test : {y_test.shape}")

Shape of X_train : (5881, 128, 9)
Shape of X_val : (1471, 128, 9)
Shape of y_train : (5881, 6)
Shape of y_test : (2947, 6)


#### Step 5: Save the numpy arrays

In [None]:
np.savez_compressed("../data/processed/processed_array.npz", 
         x_train = X_train, 
         x_test = X_test,
         x_val = X_val,
         y_train = y_train,
         y_test = y_test,
         y_val =  y_val        
    )

#### Step 6: Load Saved numpy arrays

In [9]:
data = np.load("../data/processed/processed_array.npz")