In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Importing Libraries

In [3]:
# Importing numpy 
import numpy as np
# Importing Scipy 
import scipy as sp
# Importing Pandas Library 
import pandas as pd
# import glob function to scrap files path
from glob import glob
# import display() for better visualitions of DataFrames and arrays
from IPython.display import display
# import pyplot for plotting
import matplotlib.pyplot as plt
plt.style.use('bmh') # for better plots

## Importing Data

In [4]:
Raw_data_paths = sorted(glob("/content/drive/MyDrive/Data/Original-Data/HAPT-Dataset/Raw-Data/*"))

In [5]:
################# Just to verify if all paths were scraped #################
# Selecting acc file paths only
Raw_acc_paths=Raw_data_paths[0:61]
# Selecting gyro file paths only
Raw_gyro_paths=Raw_data_paths[61:122]
# printing info related to acc and gyro files
print (("RawData folder contains in total {:d} file ").format(len(Raw_data_paths)))
print (("The first {:d} are Acceleration files:").format(len(Raw_acc_paths)))
print (("The second {:d} are Gyroscope files:").format(len(Raw_gyro_paths)))
print ("The last file is a labels file")
# printing 'labels.txt' path
print ("labels file path is:",Raw_data_paths[122])

RawData folder contains in total 123 file 
The first 61 are Acceleration files:
The second 61 are Gyroscope files:
The last file is a labels file
labels file path is: /content/drive/MyDrive/Data/Original-Data/HAPT-Dataset/Raw-Data/labels.txt


In [6]:
#    FUNCTION: import_raw_signals(path,columns)
def import_raw_signals(file_path,columns):
    # open the txt file
    opened_file =open(file_path,'r')
    # Create a list
    opened_file_list=[]
    for line in opened_file:
        opened_file_list.append([float(element) for element in line.split()])
    # convert the list of lists into 2D numpy array
    data=np.array(opened_file_list)
    # Create a pandas dataframe from this 2D numpy array with column names
    data_frame=pd.DataFrame(data=data,columns=columns)
    # return the data frame
    return data_frame

In [7]:
########################################### RAWDATA DICTIONARY ##############################################################
# creating an empty dictionary where all dataframes will be stored
raw_dic={}
# creating list contains columns names of an acc file
raw_acc_columns=['acc_X','acc_Y','acc_Z']
# creating list contains gyro files columns names
raw_gyro_columns=['gyro_X','gyro_Y','gyro_Z']
# loop for to convert  each "acc file" into data frame of floats and store it in a dictionnary.
for path_index in range(0,61):  
        # extracting the file name only and use it as key:[expXX_userXX] without "acc" or "gyro"
        key= Raw_data_paths[path_index][-16:-4]
        # Applying the function defined above to one acc_file and store the output in a DataFrame
        raw_acc_data_frame=import_raw_signals(Raw_data_paths[path_index],raw_acc_columns)  
        # By shifting the path_index by 61 we find the index of the gyro file related to same experiment_ID
        # Applying the function defined above to one gyro_file and store the output in a DataFrame
        raw_gyro_data_frame=import_raw_signals(Raw_data_paths[path_index+61],raw_gyro_columns) 
        # concatenate acc_df and gyro_df in one DataFrame
        raw_signals_data_frame=pd.concat([raw_acc_data_frame, raw_gyro_data_frame], axis=1)
        # Store this new DataFrame in a raw_dic , with the key extracted above
        raw_dic[key]=raw_signals_data_frame

In [8]:
# raw_dic is a dictionary contains 61 combined DF (acc_df and gyro_df)
print('raw_dic contains %d DataFrame' % len(raw_dic))

# print the first 3 rows of dataframe exp01_user01
display(raw_dic['exp01_user01'].head(3))

raw_dic contains 61 DataFrame


Unnamed: 0,acc_X,acc_Y,acc_Z,gyro_X,gyro_Y,gyro_Z
0,0.918056,-0.1125,0.509722,-0.054978,-0.069639,-0.030849
1,0.911111,-0.093056,0.5375,-0.012523,0.019242,-0.038485
2,0.881944,-0.086111,0.513889,-0.023518,0.276417,0.006414


In [9]:
#    FUNCTION: import_raw_labels_file(path,columns)
def import_labels_file(path,columns):
    # open the txt file
    labels_file =open(path,'r')
    # creating a list 
    labels_file_list=[]
    #Store each row in a list ,convert its list elements to int type
    for line in labels_file:
        labels_file_list.append([int(element) for element in line.split()])
    # convert the list of lists into 2D numpy array 
    data=np.array(labels_file_list)
    # Create a pandas dataframe from this 2D numpy array with column names 
    data_frame=pd.DataFrame(data=data,columns=columns)
    # returning the labels dataframe 
    return data_frame

In [10]:
#################################
# creating a list contains columns names of "labels.txt" in order
raw_labels_columns=['experiment_number_ID','user_number_ID','activity_number_ID','Label_start_point','Label_end_point']
# The path of "labels.txt" is last element in the list called "Raw_data_paths"
labels_path=Raw_data_paths[-1]
# apply the function defined above to labels.txt 
# store the output  in a dataframe 
Labels_Data_Frame=import_labels_file(labels_path,raw_labels_columns)

In [11]:
# Creating a dictionary for all types of activities
Acitivity_labels=AL={ 1: 'WALKING', 2: 'WALKING_UPSTAIRS', 3: 'WALKING_DOWNSTAIRS', 4: 'SITTING', 5: 'STANDING', 6: 'LIYING'} 

In [12]:
def visualize_signal(signal,x_labels,y_labels,title,legend):
    plt.figure(figsize=(20,4))  
    # convert row numbers in time durations
    time=[1/float(sampling_freq) *i for i in range(len(signal))]  
    # plotting the signal
    plt.plot(time,signal,label=legend) # plot the signal and add the legend 
    plt.xlabel(x_labels) # set the label of x axis in the figure
    plt.ylabel(y_labels) # set the label of y axis in the figure
    plt.title(title) # set the title of the figure
    plt.legend(loc="upper left") # set the legend in the upper left corner
    plt.show() # show the figure

## Filtering

In [13]:
#define a third order median filter 
from scipy.signal import medfilt # import the median filter functio
def median(signal):# input: numpy array 1D (one column)
    array=np.array(signal)   
    #applying the median filter
    med_filtered=sp.signal.medfilt(array, kernel_size=3) # applying the median filter order3(kernel_size=3)
    return  med_filtered # return the med-filtered signal: numpy array 1D

In [14]:
# necessary functions
sampling_freq=50.0
from scipy.fftpack import fft   # import fft(Fast Fourier Transform) function to convert a signal from time domain to 
#                               frequency domain (output :is a numpy array contains signal's amplitudes of each frequency component)
from scipy.fftpack import fftfreq # import fftfreq function to generate frequencies related to frequency components 
#                                   mentioned above
from scipy.fftpack import ifft # import ifft function (inverse fft) inverse the conversion
import math # import math library
############################## Constants #############################
nyq=sampling_freq/float(2) # nyq is the nyquist frequency equal to the half of the sampling frequency[50/2= 25 Hz]
freq1 = 0.3 # freq1=0.3 hertz [Hz] the cuttoff frequency between the DC compoenents [0,0.3]
#           and the body components[0.3,20]hz
freq2 = 20  # freq2= 20 Hz the cuttoff frequcency between the body components[0.3,20] hz
#             and the high frequency noise components [20,25] hz
# Function name: components_selection_one_signal
# Inputs: t_signal:1D numpy array (time domain signal); 
# Outputs: (total_component,t_DC_component , t_body_component, t_noise) 
#           type(1D array,1D array, 1D array)
# cases to discuss: if the t_signal is an acceleration signal then the t_DC_component is the gravity component [Grav_acc]
#                   if the t_signal is a gyro signal then the t_DC_component is not useful
# t_noise component is not useful
# if the t_signal is an acceleration signal then the t_body_component is the body's acceleration component [Body_acc]
# if the t_signal is a gyro signal then the t_body_component is the body's angular velocity component [Body_gyro]
def components_selection_one_signal(t_signal,freq1,freq2):
    t_signal=np.array(t_signal)
    t_signal_length=len(t_signal) # number of points in a t_signal
    # the t_signal in frequency domain after applying fft
    f_signal=fft(t_signal) # 1D numpy array contains complex values (in C)
    # generate frequencies associated to f_signal complex values
    freqs=np.array(sp.fftpack.fftfreq(t_signal_length, d=1/float(sampling_freq))) # frequency values between [-25hz:+25hz]
    # DC_component: f_signal values having freq between [-0.3 hz to 0 hz] and from [0 hz to 0.3hz] 
    #                                                             (-0.3 and 0.3 are included)
    # noise components: f_signal values having freq between [-25 hz to 20 hz[ and from ] 20 hz to 25 hz] 
    #                                                               (-25 and 25 hz inculded 20hz and -20hz not included)
    # selecting body_component: f_signal values having freq between [-20 hz to -0.3 hz] and from [0.3 hz to 20 hz] 
    #                                                               (-0.3 and 0.3 not included , -20hz and 20 hz included)
    f_DC_signal=[] # DC_component in freq domain
    f_body_signal=[] # body component in freq domain numpy.append(a, a[0])
    f_noise_signal=[] # noise in freq domain
    for i in range(len(freqs)):# iterate over all available frequencies
        # selecting the frequency value
        freq=freqs[i]
        # selecting the f_signal value associated to freq
        value= f_signal[i]
        # Selecting DC_component values 
        if abs(freq)>0.3:# testing if freq is outside DC_component frequency ranges
            f_DC_signal.append(float(0)) # add 0 to  the  list if it was the case (the value should not be added)                                       
        else: # if freq is inside DC_component frequency ranges 
            f_DC_signal.append(value) # add f_signal value to f_DC_signal list
        # Selecting noise component values 
        if (abs(freq)<=20):# testing if freq is outside noise frequency ranges 
            f_noise_signal.append(float(0)) # # add 0 to  f_noise_signal list if it was the case 
        else:# if freq is inside noise frequency ranges 
            f_noise_signal.append(value) # add f_signal value to f_noise_signal
        # Selecting body_component values 
        if (abs(freq)<=0.3 or abs(freq)>20):# testing if freq is outside Body_component frequency ranges
            f_body_signal.append(float(0))# add 0 to  f_body_signal list
        else:# if freq is inside Body_component frequency ranges
            f_body_signal.append(value) # add f_signal value to f_body_signal list
    ################### Inverse the transformation of signals in freq domain ########################
    # applying the inverse fft(ifft) to signals in freq domain and put them in float format
    t_DC_component= ifft(np.array(f_DC_signal)).real
    t_body_component= ifft(np.array(f_body_signal)).real
    t_noise=ifft(np.array(f_noise_signal)).real
    
    total_component=t_signal-t_noise # extracting the total component(filtered from noise) 
                                     #  by substracting noise from t_signal (the original signal).
    # return outputs mentioned earlier
    return (total_component,t_DC_component,t_body_component,t_noise) 

In [15]:
import math
def mag_3_signals(x,y,z): # Euclidian magnitude
    return [math.sqrt((x[i]**2+y[i]**2+z[i]**2)) for i in range(len(x))]

In [16]:
time_sig_dic={} # An empty dictionary will contains dataframes of all time domain signals
raw_dic_keys=sorted(raw_dic.keys()) # sorting dataframes' keys
for key in raw_dic_keys: # iterate over each key in raw_dic
    raw_df=raw_dic[key] # copie the raw dataframe associated to 'expXX_userYY' from raw_dic
    time_sig_df=pd.DataFrame() # a dataframe will contain time domain signals
    for column in raw_df.columns: # iterate over each column in raw_df 
        t_signal=np.array(raw_df[column]) # copie the signal values in 1D numpy array
        med_filtred=median(t_signal) # apply 3rd order median filter and store the filtred signal in med_filtred
        if 'acc' in column: # test if the med_filtered signal is an acceleration signal 
            # the 2nd output DC_component is the gravity_acc
            # The 3rd one is the body_component which in this case the body_acc
            _,grav_acc,body_acc,_=components_selection_one_signal(med_filtred,freq1,freq2) # apply components selection
            # store signal in time_sig_dataframe and delete the last value of each column 
            time_sig_df['t_body_'+column]=body_acc[:-1] # t_body_acc storing with the appropriate axis selected 
            #                                             from the column name
            time_sig_df['t_grav_'+column]= grav_acc[:-1] # t_grav_acc_storing with the appropriate axis selected 
            #                                              from the column name
        elif 'gyro' in column: # if the med_filtred signal is a gyro signal
            # The 3rd output of components_selection is the body_component which in this case the body_gyro component
            _,_,body_gyro,_=components_selection_one_signal(med_filtred,freq1,freq2)  # apply components selection
            # store signal in time_sig_dataframe and delete the last value of each column 
            time_sig_df['t_body_gyro_'+column[-1]]=body_gyro[:-1] # t_body_acc storing with the appropriate axis selected 
            #                                                       from the column name
    # all 9 axial signals generated above are reordered to facilitate magnitudes signals generation
    new_columns_ordered=['t_body_acc_X','t_body_acc_Y','t_body_acc_Z',
                          't_grav_acc_X','t_grav_acc_Y','t_grav_acc_Z','t_body_gyro_X','t_body_gyro_Y','t_body_gyro_Z']  
    # create new dataframe to order columns
    ordered_time_sig_df=pd.DataFrame()
    
    for col in new_columns_ordered: # iterate over each column in the new order
        ordered_time_sig_df[col]=time_sig_df[col] # store the column in the ordred dataframe
    
    # Generating magnitude signals
    for i in range(0,9,3): # iterating over each 3-axial signals
        
        mag_col_name=new_columns_ordered[i][:-1]+'mag'# Create the magnitude column name related to each 3-axial signals
        
        col0=np.array(ordered_time_sig_df[new_columns_ordered[i]]) # copy X_component
        col1=ordered_time_sig_df[new_columns_ordered[i+1]] # copy Y_component
        col2=ordered_time_sig_df[new_columns_ordered[i+2]] # copy Z_component
        
        mag_signal=mag_3_signals(col0,col1,col2) # calculate magnitude of each signal[X,Y,Z]
        ordered_time_sig_df[mag_col_name]=mag_signal # store the signal_mag with its appropriate column name
    
    time_sig_dic[key]=ordered_time_sig_df # store the ordred_time_sig_df in time_sig_dic with the appropriate key

In [17]:

time_sig_dic['exp01_user01'].head(3) # displaying the fisrt three rows

Unnamed: 0,t_body_acc_X,t_body_acc_Y,t_body_acc_Z,t_grav_acc_X,t_grav_acc_Y,t_grav_acc_Z,t_body_gyro_X,t_body_gyro_Y,t_body_gyro_Z,t_body_acc_mag,t_grav_acc_mag,t_body_gyro_mag
0,0.382899,-0.206562,-0.169117,0.431759,0.175976,0.720859,-0.025624,-0.210093,-0.361419,0.466776,0.8585,0.418831
1,0.55098,-0.317339,-0.239649,0.443087,0.169277,0.715098,-0.014575,-0.146141,-0.377275,0.679496,0.858106,0.404853
2,0.368165,-0.207616,-0.165561,0.454376,0.162556,0.709293,-0.018281,0.079766,-0.329582,0.453938,0.857892,0.33959


In [18]:
# example: 679 ==> '00679'; 50 ==> '00050'
# it add '0's to the left of the input until the new lenght is equal to 5
def normalize5(number): 
    stre=str(number)
    if len(stre)<5:
        l=len(stre)
        for i in range(0,5-l):
            stre="0"+stre
    return stre 
# it add '0's to the left of the input until the new lenght is equal to 2
def normalize2(number):
    stre=str(number)
    if len(stre)<2:
        stre="0"+stre
    return stre

## Windowing

In [19]:
#Define Windowing function
def Windowing_type_1(time_sig_dic,Labels_Data_Frame):   
    
    columns=time_sig_dic['exp01_user01'].columns # columns of time_sig_df
    window_ID=0 # window unique id
    t_dic_win_type_I={} # output dic
    
    BA_array=np.array(Labels_Data_Frame[(Labels_Data_Frame["activity_number_ID"] <7)]) # Just Basic activities
    
    for line in BA_array:
        # Each line in BA_array contains info realted to an activity

        # extracting the dataframe key that contains rows related to this activity [expID,userID]
        file_key= 'exp' + normalize2(int(line[0]))  +  '_user' + normalize2(int(line[1]))

        # extract the activity id in this line
        act_ID=line[2] # The activity identifier from 1 to 6 (6 included)

        # starting point index of an activity
        start_point=line[3]

        # from the cursor we copy a window that has 128 rows
        # the cursor step is 64 data point (50% of overlap) : each time it will be shifted by 64 rows
        for cursor in range(start_point,line[4]-127,64):

            # end_point: cursor(the first index in the window) + 128
            end_point=cursor+128 # window end row

            # selecting window data points convert them to numpy array to delete rows index
            data=np.array(time_sig_dic[file_key].iloc[cursor:end_point])

            # converting numpy array to a dataframe with the same column names
            window=pd.DataFrame(data=data,columns=columns)

            # creating the window
            key='t_W'+normalize5(window_ID)+'_'+file_key+'_act'+normalize2(act_ID)
            t_dic_win_type_I[key]=window

            # incrementing the windowID by 1
            window_ID=window_ID+1
        
    return t_dic_win_type_I

In [20]:
# apply the sliding window type 1 to "time_sig dic"
t_dic_win_type_I  = Windowing_type_1(time_sig_dic,Labels_Data_Frame)

## Feature Extraction

In [21]:
# df is dataframe contains 3 columns (3 axial signals X,Y,Z)
# mean
def mean_axial(df):
    array=np.array(df) 
    mean_vector = list(array.mean(axis=0)) 
    return mean_vector 
# std
def std_axial(df):
    array=np.array(df)
    std_vector = list(array.std(axis=0))
    return std_vector
# mad
from statsmodels.robust import mad as median_deviation 
def mad_axial(df):
    array=np.array(df)
    mad_vector = list(median_deviation(array,axis=0)) 
    return mad_vector
# max
def max_axial(df):
    array=np.array(df)
    max_vector=list(array.max(axis=0))
    return max_vector
# min
def min_axial(df):
    array=np.array(df)
    min_vector=list(array.min(axis=0))
    return min_vector
# IQR
from scipy.stats import iqr as IQR 
def IQR_axial(df):
    array=np.array(df)
    IQR_vector=list(np.apply_along_axis(IQR,0,array))
    return IQR_vector
# Entropy
from scipy.stats import entropy 
def entropy_axial(df):
    array=np.array(df)
    entropy_vector=list(np.apply_along_axis(entropy,0,abs(array)))
    return entropy_vector
# energy
def t_energy_axial(df):
    array=np.array(df)
    energy_vector=list((array**2).sum(axis=0)) 
    return energy_vector 
# mean
def mean_mag(mag_column):
    array=np.array(mag_column)
    mean_value = float(array.mean())
    return mean_value
# std: standard deviation of mag column
def std_mag(mag_column):
    array=np.array(mag_column)
    std_value = float(array.std()) # std value 
    return std_value
# mad: median deviation
def mad_mag(mag_column):
    array=np.array(mag_column)
    mad_value = float(median_deviation(array))# median deviation value of mag_column
    return mad_value
# max
def max_mag(mag_column):
    array=np.array(mag_column)
    max_value=float(array.max()) # max value 
    return max_value
# min
def min_mag(mag_column):
    array=np.array(mag_column)
    min_value= float(array.min()) # min value
    return min_value
# IQR
def IQR_mag(mag_column):
    array=np.array(mag_column)
    IQR_value=float(IQR(array))# Q3(column)-Q1(column)
    return IQR_value
# Entropy
def entropy_mag(mag_column):
    array=np.array(mag_column)
    entropy_value=float(entropy(array)) # entropy signal
    return entropy_value    
def t_energy_mag(mag_column):
    array=np.array(mag_column)
    energy_value=float((array**2).sum()) # energy of the mag signal
    return energy_value

  import pandas.util.testing as tm


In [22]:
#Time Axial Features PipeLine
def t_axial_features_generation(t_window):
    axial_columns=t_window.columns[0:9]
    # select axial columns in a dataframe
    axial_df=t_window[axial_columns]
    t_axial_features=[]
    for col in range(0,9,3):
        df=axial_df[axial_columns[col:col+3]] # select each group of 3-axial signal: signal_name[X,Y,Z]
        # apply all common axial features functions and time axial features functions to each 3-axial signals dataframe
        mean_vector   = mean_axial(df) # 3values
        std_vector    = std_axial(df) # 3 values
        mad_vector    = mad_axial(df)# 3 values
        max_vector    = max_axial(df)# 3 values
        min_vector    = min_axial(df)# 3 values
        energy_vector = t_energy_axial(df)# 3 values
        IQR_vector    = IQR_axial(df)# 3 values
        entropy_vector= entropy_axial(df)# 3 values
        t_3axial_vector= mean_vector + std_vector + mad_vector + max_vector + min_vector + energy_vector + IQR_vector + entropy_vector
        t_axial_features= t_axial_features+ t_3axial_vector
    return t_axial_features

In [23]:
def t_mag_features_generation(t_window):
    # select mag columns : the last 5 columns in a time domain window
    mag_columns=t_window.columns[9:] # mag columns' names
    mag_columns=t_window[mag_columns] # mag data frame
    t_mag_features=[] # a global list will contain all time domain magnitude features
    for col in mag_columns: # iterate throw each mag column
        mean_value   = mean_mag(mag_columns[col]) # 1 value
        std_value    = std_mag(mag_columns[col])# 1 value
        mad_value    = mad_mag(mag_columns[col])# 1 value
        max_value    = max_mag(mag_columns[col])# 1 value
        min_value    = min_mag(mag_columns[col])# 1 value
        energy_value = t_energy_mag(mag_columns[col])# 1 value
        IQR_value    = IQR_mag(mag_columns[col])# 1 value
        entropy_value= entropy_mag(mag_columns[col])# 1 value
        # 13 value per each t_mag_column
        col_mag_values = [mean_value, std_value, mad_value, max_value, min_value, 
                          energy_value,IQR_value, entropy_value]
        # col_mag_values will be added to the global list
        t_mag_features= t_mag_features+ col_mag_values
    # t_mag_features contains 65 values = 13 values (per each t_mag_column) x 5 (t_mag_columns)
    return t_mag_features

In [24]:
def time_features_names():
    # Generating time feature names
    # time domain axial signals' names
    t_axis_signals=[['t_body_acc_X','t_body_acc_Y','t_body_acc_Z'],
                    ['t_grav_acc_X','t_grav_acc_Y','t_grav_acc_Z'],
                    ['t_body_gyro_X','t_body_gyro_Y','t_body_gyro_Z'],]
    
    # time domain magnitude signals' names
    magnitude_signals=['t_body_acc_Mag','t_grav_acc_Mag','t_body_gyro_Mag']

    # functions' names:
    t_one_input_features_name1=['_mean()','_std()','_mad()','_max()','_min()']
    t_one_input_features_name2=['_energy()','_iqr()','_entropy()']
    features=[]# Empty list : it will contain all time domain features' names
    for columns in t_axis_signals: # iterate throw  each group of 3-axial signals'        
        for feature in t_one_input_features_name1: # iterate throw the first list of functions names            
            for column in columns: # iterate throw each axial signal in that group              
                newcolumn=column[:-2]+feature+column[-2:] # build the feature name
                features.append(newcolumn) # add it to the global list
        for feature in t_one_input_features_name2: # same process for the second list of features functions
            for column in columns:
                newcolumn=column[:-2]+feature+column[-2:]
                features.append(newcolumn)           
    for columns in magnitude_signals: # iterate throw time domain magnitude column names
        # build feature names related to that column
        #list 1
        for feature in t_one_input_features_name1:
            newcolumn=columns+feature
            features.append(newcolumn)  
        # list 2
        for feature in t_one_input_features_name2: 
            newcolumn=columns+feature
            features.append(newcolumn)
    ###########################################################################################################
    time_list_features=features
    
    return time_list_features # return all time domain features' names

In [25]:
# conctenate all features names lists and we add two other columns activity ids and user ids will be related to each row
all_columns=time_features_names()+['activity_Id','user_Id']
def Dataset_Generation_PipeLine(t_dic):
    # t_dic is a dic contains time domain windows
    final_Dataset=pd.DataFrame(data=[],columns= all_columns) # build an empty dataframe to append rows
    for i in range(len(t_dic)): # iterate throw each window
        # t_window and f_window should have the same window id included in their keys
        t_key=sorted(t_dic.keys() )[i] # extract the key of t_window 
        t_window=t_dic[t_key] # extract the t_window
        window_user_id= int(t_key[-8:-6]) # extract the user id from window's key
        window_activity_id=int(t_key[-2:]) # extract the activity id from the windows key
        # generate all time features from t_window 
        time_features = t_axial_features_generation(t_window) + t_mag_features_generation(t_window)
        # concatenate all features and append the activity id and the user id
        row= time_features + [int(window_activity_id),int(window_user_id)]
        # go to the first free index in the dataframe
        free_index=len(final_Dataset)
        # append the row
        final_Dataset.loc[free_index]= row  
    return final_Dataset # return the final dataset

In [29]:
Dataset= Dataset_Generation_PipeLine(t_dic_win_type_I)

In [30]:
print('The shape of Dataset is :',Dataset.shape) # shape of the dataset 
display(Dataset.describe()) # statistical description
display(Dataset.head(3)) # the first three rows

The shape of Dataset is : (10399, 98)


Unnamed: 0,t_body_acc_mean()_X,t_body_acc_mean()_Y,t_body_acc_mean()_Z,t_body_acc_std()_X,t_body_acc_std()_Y,t_body_acc_std()_Z,t_body_acc_mad()_X,t_body_acc_mad()_Y,t_body_acc_mad()_Z,t_body_acc_max()_X,t_body_acc_max()_Y,t_body_acc_max()_Z,t_body_acc_min()_X,t_body_acc_min()_Y,t_body_acc_min()_Z,t_body_acc_energy()_X,t_body_acc_energy()_Y,t_body_acc_energy()_Z,t_body_acc_iqr()_X,t_body_acc_iqr()_Y,t_body_acc_iqr()_Z,t_body_acc_entropy()_X,t_body_acc_entropy()_Y,t_body_acc_entropy()_Z,t_grav_acc_mean()_X,t_grav_acc_mean()_Y,t_grav_acc_mean()_Z,t_grav_acc_std()_X,t_grav_acc_std()_Y,t_grav_acc_std()_Z,t_grav_acc_mad()_X,t_grav_acc_mad()_Y,t_grav_acc_mad()_Z,t_grav_acc_max()_X,t_grav_acc_max()_Y,t_grav_acc_max()_Z,t_grav_acc_min()_X,t_grav_acc_min()_Y,t_grav_acc_min()_Z,t_grav_acc_energy()_X,...,t_body_gyro_max()_Y,t_body_gyro_max()_Z,t_body_gyro_min()_X,t_body_gyro_min()_Y,t_body_gyro_min()_Z,t_body_gyro_energy()_X,t_body_gyro_energy()_Y,t_body_gyro_energy()_Z,t_body_gyro_iqr()_X,t_body_gyro_iqr()_Y,t_body_gyro_iqr()_Z,t_body_gyro_entropy()_X,t_body_gyro_entropy()_Y,t_body_gyro_entropy()_Z,t_body_acc_Mag_mean(),t_body_acc_Mag_std(),t_body_acc_Mag_mad(),t_body_acc_Mag_max(),t_body_acc_Mag_min(),t_body_acc_Mag_energy(),t_body_acc_Mag_iqr(),t_body_acc_Mag_entropy(),t_grav_acc_Mag_mean(),t_grav_acc_Mag_std(),t_grav_acc_Mag_mad(),t_grav_acc_Mag_max(),t_grav_acc_Mag_min(),t_grav_acc_Mag_energy(),t_grav_acc_Mag_iqr(),t_grav_acc_Mag_entropy(),t_body_gyro_Mag_mean(),t_body_gyro_Mag_std(),t_body_gyro_Mag_mad(),t_body_gyro_Mag_max(),t_body_gyro_Mag_min(),t_body_gyro_Mag_energy(),t_body_gyro_Mag_iqr(),t_body_gyro_Mag_entropy(),activity_Id,user_Id
count,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,...,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0,10399.0
mean,0.000181,-0.000253,4.3e-05,0.128598,0.085825,0.071912,0.120879,0.077916,0.065929,0.31653,0.168586,0.156443,-0.214188,-0.238018,-0.184606,4.624136,1.798429,1.273071,0.180825,0.109636,0.093348,4.61532,4.594737,4.594336,0.808852,0.021112,0.0844,0.008457,0.012042,0.012337,0.010097,0.01417,0.014466,0.821547,0.039303,0.102925,0.796202,0.002809,0.06557,100.783474,...,0.625681,0.3733,-0.528908,-0.556874,-0.409445,14.257902,16.736559,7.618272,0.336498,0.331412,0.240865,4.613313,4.591869,4.6089,0.157755,0.07496,0.065252,0.392702,0.030762,7.695637,0.090528,4.7538,1.023167,0.007541,0.009214,1.034431,1.012165,134.03008,0.014066,4.851987,0.36007,0.166096,0.157184,0.856137,0.068808,38.612732,0.21833,4.752912,3.638523,16.174825
std,0.0091,0.006948,0.00699,0.13967,0.081466,0.068746,0.132093,0.073289,0.060767,0.34582,0.161567,0.153674,0.225625,0.250289,0.200703,6.626804,2.298622,1.818871,0.201775,0.102247,0.085888,0.078387,0.085743,0.07777,0.364705,0.378825,0.328751,0.007821,0.013156,0.011774,0.009784,0.016108,0.014257,0.360722,0.380206,0.329063,0.369249,0.378264,0.329131,48.536681,...,0.733071,0.383657,0.539176,0.623292,0.424905,20.891745,28.02317,11.409359,0.330347,0.353797,0.239619,0.086411,0.092255,0.086843,0.153709,0.077413,0.067661,0.399595,0.032104,9.887746,0.093722,0.04556,0.012227,0.005749,0.007406,0.012587,0.016188,3.181447,0.011215,7.8e-05,0.341499,0.166781,0.159392,0.866937,0.068325,52.963519,0.22005,0.056658,1.740531,8.668025
min,-0.047488,-0.038424,-0.047545,0.001815,0.002281,0.003507,0.001536,0.002089,0.002521,0.003151,0.003597,0.005995,-1.2466,-1.269651,-1.317814,0.00044,0.000774,0.001603,0.002131,0.002848,0.003692,3.484882,3.872086,3.87858,-0.359334,-0.985258,-1.003729,0.000201,0.00036,0.000155,4.5e-05,0.000198,9.3e-05,-0.324047,-0.975361,-0.979422,-0.396664,-1.035763,-1.068411,0.000838,...,0.007585,0.00669,-3.591233,-5.174843,-2.39803,0.000967,0.00295,0.001588,0.003095,0.005602,0.00426,4.020393,3.766105,3.818188,0.006015,0.002119,0.002126,0.013594,0.000286,0.005619,0.002908,4.097321,0.93934,0.000123,0.000111,0.969603,0.893021,113.035315,0.000142,4.850901,0.013054,0.002681,0.002594,0.024641,0.000394,0.025425,0.003517,4.206943,1.0,1.0
25%,-0.003148,-0.003659,-0.003376,0.009211,0.012132,0.011688,0.009886,0.012923,0.012555,0.018161,0.023344,0.02392,-0.411811,-0.438738,-0.319694,0.011672,0.020155,0.018712,0.014259,0.018746,0.017753,4.577216,4.541463,4.547751,0.907282,-0.22492,-0.120237,0.003702,0.005644,0.00581,0.004178,0.00641,0.006601,0.920679,-0.209896,-0.103711,0.896048,-0.242273,-0.139485,105.399955,...,0.056294,0.047684,-0.941848,-0.932766,-0.742795,0.153293,0.119455,0.100291,0.053048,0.044398,0.043316,4.566845,4.542876,4.556798,0.020276,0.007321,0.007491,0.038508,0.004567,0.060086,0.010295,4.730449,1.015664,0.003457,0.003966,1.027304,1.003323,132.056925,0.006121,4.851982,0.055607,0.020091,0.020183,0.102964,0.015481,0.460251,0.028437,4.730764,2.0,9.0
50%,8e-05,-0.000127,-5.2e-05,0.023508,0.031908,0.029514,0.024822,0.032945,0.031158,0.05355,0.068076,0.061223,-0.049383,-0.068953,-0.062088,0.074057,0.136669,0.117034,0.036778,0.048428,0.046238,4.623239,4.597062,4.597167,0.987594,-0.12487,0.030209,0.006484,0.009441,0.00971,0.007664,0.011013,0.011401,0.997308,-0.108495,0.048091,0.976454,-0.14175,0.011858,124.851901,...,0.207554,0.144275,-0.23422,-0.195975,-0.14428,1.53294,0.969331,0.713132,0.149079,0.116457,0.111886,4.620803,4.597565,4.615643,0.044713,0.022292,0.019679,0.113253,0.012419,0.319431,0.027681,4.758765,1.026102,0.006119,0.007271,1.035068,1.016036,134.778125,0.011193,4.852012,0.139923,0.078814,0.062179,0.397857,0.036126,3.349792,0.092791,4.760039,4.0,17.0
75%,0.003343,0.003268,0.003301,0.242569,0.159502,0.128384,0.229137,0.141543,0.116171,0.615332,0.307083,0.274988,-0.017967,-0.023442,-0.024059,7.544237,3.262471,2.118133,0.336558,0.198894,0.162466,4.666354,4.656957,4.649706,1.01087,0.136304,0.204481,0.010642,0.014943,0.015245,0.012827,0.017806,0.018088,1.020139,0.166289,0.221539,1.000061,0.113981,0.182336,130.806148,...,1.048899,0.657206,-0.059823,-0.057175,-0.048025,23.491374,22.474328,12.918238,0.589803,0.553939,0.419297,4.673257,4.654275,4.672445,0.298912,0.139272,0.119653,0.738812,0.055096,13.970278,0.165655,4.783085,1.031909,0.010022,0.012306,1.041967,1.024048,136.304119,0.018668,4.852025,0.650133,0.288784,0.276397,1.498642,0.111868,65.084295,0.381458,4.786821,5.0,24.0
max,0.046679,0.036071,0.045893,0.63572,0.341042,0.353751,0.655178,0.339054,0.364321,1.226526,0.988145,0.952303,-0.003894,-0.003914,-0.006811,51.837475,14.88781,16.072799,1.186664,0.509251,0.491147,4.773317,4.795022,4.787373,1.039117,1.009679,0.97991,0.131437,0.376279,0.283017,0.180856,0.495334,0.384324,1.10719,1.08048,1.010566,1.028532,1.002374,0.963519,138.280806,...,5.536562,2.16651,-0.006543,-0.007144,-0.003255,249.69135,301.03932,106.301331,2.024602,2.319988,1.643768,4.799382,4.789489,4.795514,0.670178,0.344218,0.362017,1.756127,0.181109,70.012708,0.470965,4.843321,1.051791,0.049066,0.073582,1.135491,1.042329,141.73737,0.103849,4.85203,1.83892,1.026705,0.903696,5.627566,0.589381,508.74116,1.270658,4.848889,6.0,30.0


Unnamed: 0,t_body_acc_mean()_X,t_body_acc_mean()_Y,t_body_acc_mean()_Z,t_body_acc_std()_X,t_body_acc_std()_Y,t_body_acc_std()_Z,t_body_acc_mad()_X,t_body_acc_mad()_Y,t_body_acc_mad()_Z,t_body_acc_max()_X,t_body_acc_max()_Y,t_body_acc_max()_Z,t_body_acc_min()_X,t_body_acc_min()_Y,t_body_acc_min()_Z,t_body_acc_energy()_X,t_body_acc_energy()_Y,t_body_acc_energy()_Z,t_body_acc_iqr()_X,t_body_acc_iqr()_Y,t_body_acc_iqr()_Z,t_body_acc_entropy()_X,t_body_acc_entropy()_Y,t_body_acc_entropy()_Z,t_grav_acc_mean()_X,t_grav_acc_mean()_Y,t_grav_acc_mean()_Z,t_grav_acc_std()_X,t_grav_acc_std()_Y,t_grav_acc_std()_Z,t_grav_acc_mad()_X,t_grav_acc_mad()_Y,t_grav_acc_mad()_Z,t_grav_acc_max()_X,t_grav_acc_max()_Y,t_grav_acc_max()_Z,t_grav_acc_min()_X,t_grav_acc_min()_Y,t_grav_acc_min()_Z,t_grav_acc_energy()_X,...,t_body_gyro_max()_Y,t_body_gyro_max()_Z,t_body_gyro_min()_X,t_body_gyro_min()_Y,t_body_gyro_min()_Z,t_body_gyro_energy()_X,t_body_gyro_energy()_Y,t_body_gyro_energy()_Z,t_body_gyro_iqr()_X,t_body_gyro_iqr()_Y,t_body_gyro_iqr()_Z,t_body_gyro_entropy()_X,t_body_gyro_entropy()_Y,t_body_gyro_entropy()_Z,t_body_acc_Mag_mean(),t_body_acc_Mag_std(),t_body_acc_Mag_mad(),t_body_acc_Mag_max(),t_body_acc_Mag_min(),t_body_acc_Mag_energy(),t_body_acc_Mag_iqr(),t_body_acc_Mag_entropy(),t_grav_acc_Mag_mean(),t_grav_acc_Mag_std(),t_grav_acc_Mag_mad(),t_grav_acc_Mag_max(),t_grav_acc_Mag_min(),t_grav_acc_Mag_energy(),t_grav_acc_Mag_iqr(),t_grav_acc_Mag_entropy(),t_body_gyro_Mag_mean(),t_body_gyro_Mag_std(),t_body_gyro_Mag_mad(),t_body_gyro_Mag_max(),t_body_gyro_Mag_min(),t_body_gyro_Mag_energy(),t_body_gyro_Mag_iqr(),t_body_gyro_Mag_entropy(),activity_Id,user_Id
0,0.002012,0.000431,0.004441,0.004025,0.013983,0.027372,0.004725,0.019132,0.02528,0.010209,0.022011,0.037486,-0.008388,-0.023711,-0.052407,0.002592,0.02505,0.098423,0.00639,0.026772,0.040179,4.627741,4.713095,4.663487,1.017372,-0.124632,0.095094,0.003962,0.01262,0.027628,0.005248,0.020037,0.026857,1.025297,-0.108409,0.153468,1.012324,-0.140894,0.064611,132.48787,...,0.167604,0.060159,-0.052091,-0.122326,-0.047952,0.105876,1.423854,0.171604,0.043029,0.208989,0.075254,4.608497,4.743109,4.750815,0.029616,0.010382,0.011036,0.054894,0.01155,0.126065,0.015087,4.791611,1.02981,0.007104,0.007535,1.045479,1.02179,135.75168,0.010564,4.852007,0.109753,0.035299,0.035964,0.173727,0.036504,1.701334,0.045374,4.795787,5.0,1.0
1,-0.000713,-0.003098,0.000823,0.004491,0.012449,0.02266,0.004168,0.014039,0.022765,0.00903,0.023948,0.037486,-0.010594,-0.023711,-0.033805,0.002647,0.021066,0.065811,0.007073,0.017922,0.042414,4.643639,4.648556,4.73191,1.02012,-0.11997,0.096058,0.004214,0.009689,0.021082,0.004342,0.01126,0.024016,1.024829,-0.108409,0.120894,1.012324,-0.137701,0.064611,133.20473,...,0.09834,0.052728,-0.045783,-0.122326,-0.047952,0.146662,0.714776,0.126087,0.059168,0.111162,0.056,4.714757,4.685009,4.714091,0.025372,0.007461,0.008141,0.040453,0.0088,0.089523,0.010641,4.807681,1.031881,0.006512,0.006984,1.03939,1.02179,136.297,0.012982,4.85201,0.083092,0.028474,0.035679,0.128132,0.030368,0.987526,0.053077,4.788878,5.0,1.0
2,-0.000301,0.004025,-0.00428,0.004866,0.009352,0.016821,0.005255,0.010157,0.020681,0.011261,0.023948,0.026609,-0.010594,-0.014186,-0.033805,0.003042,0.013269,0.038562,0.007601,0.013088,0.031358,4.649658,4.570595,4.711468,1.020183,-0.129146,0.096976,0.003381,0.007155,0.019628,0.004877,0.008684,0.025198,1.024829,-0.112648,0.120894,1.016018,-0.137812,0.06895,133.220565,...,0.098349,0.052728,-0.049518,-0.0853,-0.031499,0.154745,0.475872,0.085617,0.061101,0.100769,0.03892,4.746211,4.683337,4.606529,0.019706,0.006355,0.00677,0.037318,0.005407,0.054873,0.008902,4.797875,1.033093,0.004878,0.007153,1.03939,1.026569,136.615047,0.010379,4.852019,0.07028,0.025618,0.033895,0.112194,0.017656,0.716233,0.045185,4.781886,5.0,1.0


## Exporting the new data

In [28]:

path1="/content/drive/MyDrive/Sam/Dataset.csv" 
Dataset_type_I.to_csv(path_or_buf=path1, na_rep='NaN',  
             columns=None, header=True, 
             index=False, mode='w', 
             encoding='utf-8',  
             line_terminator='\n', 
             )
