In [2]:
! pip install pandas numpy matplotlib seaborn

^C


---
UTD-MHAD

In [1]:
import os 
import scipy

In [None]:
data_path = '../../../Data/CZU-MHAD/sensor_mat'

In [None]:
sample_file_path = os.path.join(data_path, 'cx_a1_t2.mat')
data_dict = scipy.io.loadmat(sample_file_path)
data_dict

---
KU-HAR

In [1]:
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np 

In [2]:
data_path = "../../../Data/KU-HAR_time_domain_subsamples_20750x300.csv"
data_df = pd.read_csv(data_path)
data_df.head()

Unnamed: 0,0.0042305,-0.00028038,-0.013906,-0.0025436,0.027433,0.0583,0.05167,0.071857,0.080653,0.047917,...,-0.016692,-0.018566,-0.017361,-0.017919,-0.01668,-0.01665,-0.013097,0,300,1
0,0.012482,0.032496,0.067856,0.071552,0.078103,0.040353,-0.001059,-0.01858,-0.026813,-0.01128,...,-0.00821,-0.010408,-0.011459,-0.011747,-0.010394,-0.00807,-0.004354,0,300,2
1,0.012127,0.028458,0.062075,0.052611,0.022942,-0.010017,-0.023151,-0.028515,0.005036,0.00845,...,0.000915,0.000771,-0.00256,-0.00302,-0.00419,0.000215,0.00085,0,300,3
2,0.052964,0.074319,0.11067,0.10849,0.091825,0.056989,0.029337,-0.016724,-0.042265,-0.053983,...,-0.001209,0.002878,0.000663,0.000982,-0.002148,-0.0092,-0.010653,0,300,4
3,-0.020462,-0.030787,-0.008617,0.008906,0.045046,0.042136,0.045037,0.045182,0.025113,0.02173,...,-0.006369,-0.006614,-0.004698,-0.007279,-0.006861,-0.006161,-0.006113,0,300,5
4,0.039138,0.037687,0.022782,0.009661,-0.000885,-0.00063,-0.019837,-0.016246,-0.00758,0.001479,...,0.00193,0.0045,0.008865,0.01129,0.010965,0.010364,0.009872,0,300,6


In [3]:
data_df.isnull().sum().sum()

0

In [4]:
# preprocessing of the original dataset to make compatible with model input format
subdf_list = []

for i, r in data_df.iterrows():
    r = r.values 
    acx, acy, acz, gyx, gyy, gyz, label, _, ID = r[:300], r[300:600], r[600:900], r[900:1200], r[1200:1500], r[1500:1800], r[1800], r[1801], r[1802]
    sub_df = pd.DataFrame({'accelX': acx, 'accelY': acy, 'accelZ': acz, 'GyroX': gyx, 'GyroY': gyy, 'GyroZ': gyz}, index=range(300))
    sub_df['label'] = int(label)
    sub_df['ID'] = int(ID)
    subdf_list.append(sub_df)


In [5]:
full_df = pd.concat(subdf_list)
print(full_df.shape)

(6224700, 8)


In [6]:
full_df.head()

Unnamed: 0,accelX,accelY,accelZ,GyroX,GyroY,GyroZ,label,ID
0,0.012482,-0.081862,0.007547,-0.024319,-0.010539,-0.007933,0,2
1,0.032496,-0.087494,0.042496,-0.025171,-0.003985,-0.006765,0,2
2,0.067856,-0.054918,0.071386,-0.023936,0.001559,-0.003346,0,2
3,0.071552,-0.030374,0.062229,-0.022035,0.011073,-0.003037,0,2
4,0.078103,-0.012147,0.066126,-0.021341,0.020339,-0.005582,0,2


In [7]:
sub_df.reset_index(inplace=True, drop=True)
sub_df.head()

Unnamed: 0,accelX,accelY,accelZ,GyroX,GyroY,GyroZ,label,ID
0,0.8381,-0.33077,0.57978,0.38153,-0.83939,-0.52606,9,20750
1,-1.2116,-1.8595,-0.50821,0.36311,-0.86485,-0.52302,9,20750
2,-0.56718,-1.6048,0.019184,0.28757,-0.93565,-0.50314,9,20750
3,-0.55532,-0.071769,-0.94576,0.23723,-0.92913,-0.46137,9,20750
4,-0.64496,-0.07378,-0.96658,-0.005537,-0.77373,-0.19465,9,20750


In [13]:
class KUHARData(object):
    """KU-HAR dataset implementation"""

    def __init__(self, data_dir, n_proc=1, config=None):
        self.all_df, self.labels_df = self.load_all(data_dir)
        self.all_IDs = self.all_df.ID.unique()
        self.feature_names = self.all_df.columns[:-2]
        self.feature_df = self.all_df[self.feature_names]
        self.class_names = self.labels_df.labels.unique()
    
    def load_data(self, data_dir):
        df = pd.read_csv(data_dir)
        return df

    def load_all(self, data_dir):
        main_df = self.load_data(data_dir)
        subdf_list = []
        label_dict = {'ID': [], 'label': []}

        for i, r in main_df.iterrows():
            r = r.values 
            acx, acy, acz, gyx, gyy, gyz, label, _, ID = r[:300], r[300:600], r[600:900], r[900:1200], r[1200:1500], r[1500:1800], r[1800], r[1801], r[1802]
            sub_df = pd.DataFrame({'accelX': acx, 'accelY': acy, 'accelZ': acz, 'GyroX': gyx, 'GyroY': gyy, 'GyroZ': gyz}, index=[int(ID),]*300)
            label_dict['label'].append(int(label))
            label_dict['ID'].append(int(ID))
            subdf_list.append(sub_df)
            label_df = pd.DataFrame(label_dict)
            label_df.set_index('ID')

        full_df = pd.concat(subdf_list)
        # full_df.reset_index(inplace=True, drop=True)
        return full_df, label_df

In [14]:
dt = KUHARData(data_path)

In [15]:
dt.all_df.head()

Unnamed: 0,accelX,accelY,accelZ,GyroX,GyroY,GyroZ,label,ID
2,0.012482,-0.081862,0.007547,-0.024319,-0.010539,-0.007933,0,2
2,0.032496,-0.087494,0.042496,-0.025171,-0.003985,-0.006765,0,2
2,0.067856,-0.054918,0.071386,-0.023936,0.001559,-0.003346,0,2
2,0.071552,-0.030374,0.062229,-0.022035,0.011073,-0.003037,0,2
2,0.078103,-0.012147,0.066126,-0.021341,0.020339,-0.005582,0,2


In [21]:
dt.feature_df.mean()

accelX    -54.780617
accelY    127.582697
accelZ   -264.334243
GyroX       0.492749
GyroY      75.552991
GyroZ     140.385796
dtype: float64

In [18]:
dt.feature_df.loc[2].values.shape

(300, 6)

In [33]:
dt.feature_names

Index(['accelX', 'accelY', 'accelZ', 'GyroX', 'GyroY', 'GyroZ'], dtype='object')

In [34]:
dt.all_IDs

array([    2,     3,     4, ..., 20748, 20749, 20750], dtype=int64)

In [67]:
class KUHARData(object):
    """KU-HAR dataset implementation"""

    def __init__(self, data_dir, n_proc=1, config=None):
        self.all_df, self.label_df = self.load_all(data_dir)
        self.all_IDs = self.all_df.ID.unique()
        self.feature_names = self.all_df.columns[:-2]
        self.feature_df = self.all_df[self.feature_names]
    
    def load_data(self, data_dir):
        df = pd.read_csv(data_dir)
        return df

    def load_all(self, data_dir):
        main_df = self.load_data(data_dir)
        df_dict = {'accelX': [], 'accelY': [], 'accelZ': [], 'GyroX': [], 'GyroY': [], 'GyroZ': [], 'label': [], 'ID': []}

        for i, r in main_df.iterrows():
            r = r.values 
            acx, acy, acz, gyx, gyy, gyz, label, _, ID = r[:300], r[300:600], r[600:900], r[900:1200], r[1200:1500], r[1500:1800], r[1800], r[1801], r[1802]
            # sub_df = pd.DataFrame({'accelX': acx, 'accelY': acy, 'accelZ': acz, 'GyroX': gyx, 'GyroY': gyy, 'GyroZ': gyz}, index=range(300))
            df_dict['accelX'].append(acx)
            df_dict['accelY'].append(acy)
            df_dict['accelZ'].append(acz)
            df_dict['GyroX'].append(gyx)
            df_dict['GyroY'].append(gyy)
            df_dict['GyroZ'].append(gyz)
            df_dict['label'].append(int(label))
            df_dict['ID'].append(int(ID))
            # subdf_list.append(sub_df)

        full_df = pd.DataFrame(df_dict, index=df_dict['ID'])
        label_df = pd.DataFrame({'ID': df_dict['ID'], 'label': df_dict['label']})
        label_df.set_index('ID', inplace=True)
        # full_df.reset_index(inplace=True, drop=True)
        return full_df, label_df

In [68]:
dt = KUHARData(data_path)

In [65]:
dt.all_df.head()

Unnamed: 0,accelX,accelY,accelZ,GyroX,GyroY,GyroZ,label,ID
2,"[0.012482, 0.032496, 0.067856, 0.071552, 0.078...","[-0.081862, -0.087494, -0.054918, -0.030374, -...","[0.0075474, 0.042496, 0.071386, 0.062229, 0.06...","[-0.024319, -0.025171, -0.023936, -0.022035, -...","[-0.010539, -0.0039851, 0.0015593, 0.011073, 0...","[-0.0079325, -0.0067652, -0.0033457, -0.003037...",0,2
3,"[0.012127, 0.028458, 0.062075, 0.052611, 0.022...","[-0.014245, 0.013939, 0.021417, 0.035381, 0.03...","[0.059104, 0.077302, 0.071605, 0.040671, 0.011...","[-0.031197, -0.030023, -0.028208, -0.030385, -...","[0.0069761, 0.012883, 0.017098, 0.020648, 0.01...","[-0.003934, -0.0027187, -0.0052953, -0.0097515...",0,3
4,"[0.052964, 0.074319, 0.11067, 0.10849, 0.09182...","[-0.043185, -0.016662, -0.0053775, 0.010687, 0...","[-0.019286, 0.0090208, 0.062509, 0.088431, 0.0...","[0.0011719, 0.0023993, -0.0012175, 0.0012269, ...","[-0.026508, -0.026477, -0.020948, -0.015568, -...","[0.0043762, 0.0048727, 0.007712, 0.0095564, 0....",0,4
5,"[-0.020462, -0.030787, -0.0086174, 0.0089065, ...","[0.054955, 0.039589, 0.010204, -0.0080236, -0....","[-0.091741, -0.074799, -0.055399, -0.011008, 0...","[-0.02631, -0.026255, -0.027553, -0.031039, -0...","[0.00086743, -0.015774, -0.023115, -0.023046, ...","[-0.011875, -0.01436, -0.012757, -0.010605, -0...",0,5
6,"[0.039138, 0.037687, 0.022782, 0.0096612, -0.0...","[0.011263, 0.015195, 0.020231, 0.022397, 0.001...","[0.032535, 0.034359, 0.033884, 0.033403, 0.041...","[-0.020749, -0.021244, -0.025445, -0.027144, -...","[-0.0020231, 0.002309, 0.0049787, 0.0065309, 0...","[-0.009616, -0.0094463, -0.010087, -0.010154, ...",0,6


In [60]:
dt.feature_names

Index(['accelX', 'accelY', 'accelZ', 'GyroX', 'GyroY', 'GyroZ'], dtype='object')

In [52]:
u = np.array([i for i in dt.feature_df.loc[10].values])

In [53]:
u.T.shape

(300, 6)

In [69]:
dt.label_df.head()

Unnamed: 0_level_0,label
ID,Unnamed: 1_level_1
2,0
3,0
4,0
5,0
6,0
