In [1]:
import pandas as pd
import os

In [2]:
walking_paths = [
    'archive/Walking-2023-09-16_18-14-40',
    'archive/Walking-2023-09-14_21-51-59'
]

sitting_paths = [
    'archive/Sitting-2023-10-18_09-05-37',
    'archive/Sitting-2023-09-14_09-11-15',
    'archive/Sitting-2023-09-14_08-37-45'
]

cycling_paths = [
    'archive/Cycling-2023-10-18_06-51-26',
    'archive/Cycling-2023-10-18_06-36-17',
    'archive/Cycling-2023-09-16_09-25-09',
    'archive/Cycling-2023-09-16_07-43-07',
    'archive/Cycling-2023-09-14_06-47-00',
    'archive/Cycling-2023-09-14_06-33-47',
    'archive/Cycling-2023-09-14_06-22-31'
]

In [13]:
def merge_data_by_filename(file_name, paths):
    frames = []
    for path in paths:
        data_frames = []
        for root, _, files in os.walk(path):
            for file in files:
                if file == file_name:
                    df = pd.read_csv(os.path.join(root, file))
                    activity = None
                    if 'Walking' in path:
                        activity = 'walking'
                    elif 'Sitting' in path:
                        activity = 'sitting'
                    elif 'Cycling' in path:
                        activity = 'cycling'
                    if activity is not None:
                        df['Activity'] = activity
                        data_frames.append(df)
        if data_frames:
            frames.append(pd.concat(data_frames))
    if frames:
        return pd.concat(frames)
    else:
        return None


In [35]:
paths_combined=walking_paths + sitting_paths + cycling_paths
orientation = merge_data_by_filename('Orientation.csv', paths_combined)
location=merge_data_by_filename('Location.csv', paths_combined)
total_acceleration=merge_data_by_filename('TotalAcceleration.csv', paths_combined)
magnetometer=merge_data_by_filename('Magnetometer.csv',paths_combined)
accelerometer=merge_data_by_filename('Accelerometer.csv',paths_combined)
location_gps=merge_data_by_filename('LocationGps.csv',paths_combined)
gyroscope=merge_data_by_filename('Gyroscope.csv',paths_combined)
pedometer=merge_data_by_filename('Pedometer.csv',paths_combined)
location_network=merge_data_by_filename('LocationNetwork.csv',paths_combined)
gravity=merge_data_by_filename('Gravity.csv',paths_combined)

In [37]:
data_frames = [orientation, location, total_acceleration, magnetometer, total_acceleration, 
               accelerometer, location_gps, gyroscope, pedometer, location_network, gravity]

parameter_names = ['Orientation', 'Location', 'Total Acceleration', 'Magnetometer', 'Total Acceleration', 
                   'Accelerometer', 'Location GPS', 'Gyroscope', 'Pedometer', 'Location Network', 'Gravity']

In [38]:
# Info about data and searching for missing values
for parameter, parameter_name in zip(data_frames, parameter_names):
    print("Info for parameter:", parameter_name)
    print(parameter.info())
    print("\nMissing values:")
    print(parameter.isnull().sum())
    print("\n")

Info for parameter: Orientation
<class 'pandas.core.frame.DataFrame'>
Index: 2775510 entries, 0 to 71460
Data columns (total 10 columns):
 #   Column           Dtype  
---  ------           -----  
 0   time             int64  
 1   seconds_elapsed  float64
 2   qz               float64
 3   qy               float64
 4   qx               float64
 5   qw               float64
 6   roll             float64
 7   pitch            float64
 8   yaw              float64
 9   Activity         object 
dtypes: float64(8), int64(1), object(1)
memory usage: 232.9+ MB
None

Missing values:
time               0
seconds_elapsed    0
qz                 0
qy                 0
qx                 0
qw                 0
roll               0
pitch              2
yaw                0
Activity           0
dtype: int64


Info for parameter: Location
<class 'pandas.core.frame.DataFrame'>
Index: 8980 entries, 0 to 229
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------ 

In [42]:
for parameter, parameter_name in zip(data_frames, parameter_names):
    print(parameter_name)
    print(parameter.head())


Orientation
                  time  seconds_elapsed        qz        qy        qx  \
0  1694888080184831500         0.129832 -0.495947 -0.129112  0.153800   
1  1694888080187351000         0.132351 -0.496316 -0.129446  0.153994   
2  1694888080189870300         0.134870 -0.496670 -0.129838  0.154196   
3  1694888080192389600         0.137390 -0.497005 -0.130284  0.154409   
4  1694888080194909200         0.139909 -0.497314 -0.130776  0.154643   

         qw      roll     pitch       yaw Activity  
0  0.844815 -0.071233 -0.398385  1.047297  walking  
1  0.844512 -0.071450 -0.399103  1.048190  walking  
2  0.844206 -0.071771 -0.399894  1.049035  walking  
3  0.843902 -0.072187 -0.400758  1.049822  walking  
4  0.843601 -0.072679 -0.401704  1.050542  walking  
Location
                  time  seconds_elapsed  bearingAccuracy  speedAccuracy  \
0  1694888080756000000            0.701       179.899994            4.5   
1  1694888081256000000            1.201       126.300003            8.4 