In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import re

In [27]:
filelist = glob.glob("dataset/User*/*.csv")

print(len(filelist))

848


In [28]:
print(filelist[:10])

['dataset/User22/272_Werable1_2020-11-07T10.25.22.955_F1E55E2FE95F_Pressure_7.330Hz_1.5.0.csv', 'dataset/User22/277_Werable1_2020-11-07T10.58.42.895_F1E55E2FE95F_Accelerometer_100.000Hz_1.5.0.csv', 'dataset/User22/273_Werable1_2020-11-07T10.29.47.662_F1E55E2FE95F_Accelerometer_100.000Hz_1.5.0.csv', 'dataset/User22/280_Werable1_2020-11-07T10.44.47.493_F1E55E2FE95F_Gyroscope_100.000Hz_1.5.0.csv', 'dataset/User22/276_Werable1_2020-11-07T10.55.27.209_F1E55E2FE95F_Accelerometer_100.000Hz_1.5.0.csv', 'dataset/User22/270_Werable1_2020-11-07T10.17.08.139_F1E55E2FE95F_Accelerometer_100.000Hz_1.5.0.csv', 'dataset/User22/268_Werable1_2020-11-07T10.07.24.833_F1E55E2FE95F_Magnetometer_20.000Hz_1.5.0.csv', 'dataset/User22/271_Werable1_2020-11-07T10.21.01.512_F1E55E2FE95F_Accelerometer_100.000Hz_1.5.0.csv', 'dataset/User22/271_Werable1_2020-11-07T10.21.01.512_F1E55E2FE95F_Pressure_7.330Hz_1.5.0.csv', 'dataset/User22/269_Werable1_2020-11-07T10.13.21.344_F1E55E2FE95F_Pressure_7.330Hz_1.5.0.csv']


In [29]:
print(filelist[0].split("/")[-1])

272_Werable1_2020-11-07T10.25.22.955_F1E55E2FE95F_Pressure_7.330Hz_1.5.0.csv


In [30]:
df = pd.DataFrame.from_dict(filelist)

In [32]:
df.rename(columns={df.columns[0]: "full_path"}, inplace=True)
df.head()

Unnamed: 0,full_path
0,dataset/User22/272_Werable1_2020-11-07T10.25.2...
1,dataset/User22/277_Werable1_2020-11-07T10.58.4...
2,dataset/User22/273_Werable1_2020-11-07T10.29.4...
3,dataset/User22/280_Werable1_2020-11-07T10.44.4...
4,dataset/User22/276_Werable1_2020-11-07T10.55.2...


In [34]:
df['file_name'] = df['full_path'].apply(lambda x: os.path.basename(x))
df.head()

Unnamed: 0,full_path,file_name
0,dataset/User22/272_Werable1_2020-11-07T10.25.2...,272_Werable1_2020-11-07T10.25.22.955_F1E55E2FE...
1,dataset/User22/277_Werable1_2020-11-07T10.58.4...,277_Werable1_2020-11-07T10.58.42.895_F1E55E2FE...
2,dataset/User22/273_Werable1_2020-11-07T10.29.4...,273_Werable1_2020-11-07T10.29.47.662_F1E55E2FE...
3,dataset/User22/280_Werable1_2020-11-07T10.44.4...,280_Werable1_2020-11-07T10.44.47.493_F1E55E2FE...
4,dataset/User22/276_Werable1_2020-11-07T10.55.2...,276_Werable1_2020-11-07T10.55.27.209_F1E55E2FE...


In [59]:
def extract_experiment_id(filename):
    """
    it takes in a string like 123Datafile_Gyroscope and returns the experiment id "123"
    """
    match = re.match(r'(\d+)', filename)
    if match:
        return match.group(1)
    return None


def get_activity_id_from_experiment_id(experiment_id: int) -> int:
    """
    The function takes in a string (experiment_id),
    and returns activity ID

    In dataset2, the following is the mapping for experiment_id to activity_id
    133 -> 1
    134 -> 2
    ....
    148 -> 15
    """
    
    FIRST_EXPERIMENT = 133
    NO_OF_ACTIVITIES = 15
    
    activity_id = (experiment_id - FIRST_EXPERIMENT) % NO_OF_ACTIVITIES + 1
    return activity_id

    133 - 133 % 15 = 0 + 1 

In [62]:
df['sensor_type'] = df['file_name'].apply(lambda fn: fn.split("_")[4])
df['frequency'] = df['file_name'].apply(lambda fn: fn.split("_")[5])
df['experiment_id'] = df['file_name'].apply(lambda fn: int(extract_experiment_id(fn)))
df['activity_no'] = df['experiment_id'].apply(lambda exp_id: get_activity_id_from_experiment_id(exp_id))

In [63]:
df.head()

Unnamed: 0,full_path,file_name,sensor_type,frequency,experiment_id,activity_no
0,dataset/User22/272_Werable1_2020-11-07T10.25.2...,272_Werable1_2020-11-07T10.25.22.955_F1E55E2FE...,Pressure,7.330Hz,272,5
1,dataset/User22/277_Werable1_2020-11-07T10.58.4...,277_Werable1_2020-11-07T10.58.42.895_F1E55E2FE...,Accelerometer,100.000Hz,277,10
2,dataset/User22/273_Werable1_2020-11-07T10.29.4...,273_Werable1_2020-11-07T10.29.47.662_F1E55E2FE...,Accelerometer,100.000Hz,273,6
3,dataset/User22/280_Werable1_2020-11-07T10.44.4...,280_Werable1_2020-11-07T10.44.47.493_F1E55E2FE...,Gyroscope,100.000Hz,280,13
4,dataset/User22/276_Werable1_2020-11-07T10.55.2...,276_Werable1_2020-11-07T10.55.27.209_F1E55E2FE...,Accelerometer,100.000Hz,276,9
