<a href="https://colab.research.google.com/github/mrushad/ml4qs-G39/blob/main/notebooks/mlqs_lydia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
import os
import pandas as pd
import numpy as np

# Cloning github repo to access data

In [2]:
!git clone https://github.com/mrushad/ml4qs-G39

fatal: destination path 'ml4qs-G39' already exists and is not an empty directory.


# Functions

## read_csv_files()

In [3]:
def read_csv_files():
  csv_files = [
      'Accelerometer.csv',
      'Linear Accelerometer.csv',
      'Barometer.csv',
      'Location.csv',
      'Gyroscope.csv',
      'Magnetometer.csv',
      'Proximity.csv'
  ]

  # Dictionary to store DataFrames
  dfs = {}

  for filename in csv_files:
      try:
          # Create a clean DataFrame name (e.g., 'Accelerometer_df')
          df_name = filename.replace('.csv', '') + '_df'

          # Read the CSV file
          df = pd.read_csv(filename)

          # Store it in the dictionary
          dfs[df_name] = df

          print(f"Loaded '{filename}' into DataFrame '{df_name}'. Shape: {df.shape}")
          # Optional: Display the first few rows to verify
          # print(dfs[df_name].head())

      except FileNotFoundError:
          print(f"Error: '{filename}' not found in the current directory ({os.getcwd()}).")
      except Exception as e:
          print(f"Error loading '{filename}': {e}")

  # --- Accessing your DataFrames ---

  # You can now access your DataFrames like this:
  if 'Accelerometer_df' in dfs:
      print("\nAccelerometer_df head:")
      print(dfs['Accelerometer_df'].head())

  if 'Gyroscope_df' in dfs:
      print("\nGyroscope_df head:")
      print(dfs['Gyroscope_df'].head())

  # Or iterate through them
  print("\n--- All loaded DataFrames ---")
  for name, df in dfs.items():
      print(f"DataFrame: {name}, Columns: {df.columns.tolist()}, Shape: {df.shape}")
  return dfs

## combine_dfs()

In [4]:
def combine_dfs(sensor_dfs):
    # Initialize an empty list to store processed DataFrames
    processed_dfs = []

    # Process each DataFrame
    for sensor_name, df in sensor_dfs.items(): # Iterate using the 'sensor_dfs' for cleaner prefixes
        # Create a copy to avoid modifying the original DataFrame directly
        df_copy = df.copy()

        # Round the 'Time (s)' column to three decimal places
        df_copy['Time (s)'] = df_copy['Time (s)'].round(2)

        # Rename columns to include the sensor name as a prefix
        new_columns = {}
        for col in df_copy.columns:
            if col != 'Time (s)':
                # Replace spaces in column names with underscores for consistency
                # Also remove parentheses from the unit part of the column name
                new_col_name = f"{sensor_name}_{col.replace(' ', '_').replace('(', '').replace(')', '')}"
                new_columns[col] = new_col_name
        df_copy = df_copy.rename(columns=new_columns)
        processed_dfs.append(df_copy) # Correctly append to 'processed_dfs'

    # Merge all processed DataFrames based on the 'Time (s)' column
    # Start with the first processed DataFrame
    if processed_dfs:
        combined_df = processed_dfs[0] # Use 'processed_dfs' for merging
        for i in range(1, len(processed_dfs)):
            combined_df = pd.merge(combined_df, processed_dfs[i], on='Time (s)', how='outer')

        # Display the combined DataFrame information
        print("Combined DataFrame Head:")
        print(combined_df.head())
        print("\nCombined DataFrame Info:")
        combined_df.info()
    else:
        print("No DataFrames to combine.")
    return combined_df

## identify_correlation()

In [5]:
def identify_correlation(df, correlation_method='spearman', correlation_threshold=0.6):

    correlation = biking_df.corr(method=correlation_method)
    correlation_threshold = 0.60

    print(f"Pairs with Strong Spearman Correlation (absolute value >= {correlation_threshold}):\n")
    num_cols = len(correlation.columns)
    found_strong_correlation = False

    for i in range(num_cols):
        for j in range(i + 1, num_cols):
            var1 = correlation.columns[i]
            var2 = correlation.columns[j]
            correlation_value = correlation.iloc[i, j]

            if abs(correlation_value) >= correlation_threshold:
                print(f"  {var1} vs {var2}: {correlation_value:.4f}")
                found_strong_correlation = True

    if not found_strong_correlation:
        print("  No pairs found with strong Spearman correlation above the threshold.")

# CSV to DF

## Biking Data

In [6]:
os.chdir('ml4qs-G39')
!ls -F

data/  LICENSE	notebooks/  README.md  reports/  requirements.txt  src/


In [7]:
%cd data/raw

/content/ml4qs-G39/data/raw


In [8]:
!ls -F

'BikingData1 2025-06-06 10-48-07'/  'WalkingData1 2025-06-06 15-30-08'/


In [9]:
%cd BikingData1 2025-06-06 10-48-07

/content/ml4qs-G39/data/raw/BikingData1 2025-06-06 10-48-07


In [10]:
!ls -F

 Accelerometer.csv  'Linear Accelerometer.csv'	 meta/
 Barometer.csv	     Location.csv		 Proximity.csv
 Gyroscope.csv	     Magnetometer.csv


In [11]:
biking_dfs = read_csv_files()
bkining_accelerometer_df = biking_dfs['Accelerometer_df']
bkining_linear_accelerometer_df = biking_dfs['Linear Accelerometer_df']
bkining_barometer_df = biking_dfs['Barometer_df']
bkining_location_df = biking_dfs['Location_df']
bkining_gyroscope_df = biking_dfs['Gyroscope_df']
bkining_magnetometer_df = biking_dfs['Magnetometer_df']
bkining_proximity_df = biking_dfs['Proximity_df']

Loaded 'Accelerometer.csv' into DataFrame 'Accelerometer_df'. Shape: (90430, 4)
Loaded 'Linear Accelerometer.csv' into DataFrame 'Linear Accelerometer_df'. Shape: (90430, 4)
Loaded 'Barometer.csv' into DataFrame 'Barometer_df'. Shape: (847, 2)
Loaded 'Location.csv' into DataFrame 'Location_df'. Shape: (900, 8)
Loaded 'Gyroscope.csv' into DataFrame 'Gyroscope_df'. Shape: (90430, 4)
Loaded 'Magnetometer.csv' into DataFrame 'Magnetometer_df'. Shape: (90430, 4)
Loaded 'Proximity.csv' into DataFrame 'Proximity_df'. Shape: (66, 2)

Accelerometer_df head:
   Time (s)  X (m/s^2)  Y (m/s^2)  Z (m/s^2)
0  0.008161  -1.333277   8.060289   8.314460
1  0.018114  -1.519939   8.037236   6.289621
2  0.028068  -1.010099   7.053333   4.551286
3  0.038021  -0.723446   6.263126   5.793253
4  0.047974  -0.928819   6.392906   6.570437

Gyroscope_df head:
   Time (s)  X (rad/s)  Y (rad/s)  Z (rad/s)
0  0.010650  -0.048301   0.043927   0.077801
1  0.020603  -0.013787  -0.115126   0.007624
2  0.030556   0.0621

In [12]:
biking_df = combine_dfs(biking_dfs)
biking_df.head()

Combined DataFrame Head:
   Time (s)  Accelerometer_df_X_m/s^2  Accelerometer_df_Y_m/s^2  \
0     -0.54                       NaN                       NaN   
1      0.01                 -1.333277                  8.060289   
2      0.02                 -1.519939                  8.037236   
3      0.03                 -1.010099                  7.053333   
4      0.04                 -0.723446                  6.263126   

   Accelerometer_df_Z_m/s^2  Linear Accelerometer_df_X_m/s^2  \
0                       NaN                              NaN   
1                  8.314460                        -0.571053   
2                  6.289621                        -0.663435   
3                  4.551286                        -0.093932   
4                  5.793253                         0.074273   

   Linear Accelerometer_df_Y_m/s^2  Linear Accelerometer_df_Z_m/s^2  \
0                              NaN                              NaN   
1                         0.853277           

Unnamed: 0,Time (s),Accelerometer_df_X_m/s^2,Accelerometer_df_Y_m/s^2,Accelerometer_df_Z_m/s^2,Linear Accelerometer_df_X_m/s^2,Linear Accelerometer_df_Y_m/s^2,Linear Accelerometer_df_Z_m/s^2,Barometer_df_X_hPa,Location_df_Latitude_°,Location_df_Longitude_°,...,Location_df_Direction_°,Location_df_Horizontal_Accuracy_m,Location_df_Vertical_Accuracy_°,Gyroscope_df_X_rad/s,Gyroscope_df_Y_rad/s,Gyroscope_df_Z_rad/s,Magnetometer_df_X_µT,Magnetometer_df_Y_µT,Magnetometer_df_Z_µT,Proximity_df_Distance_cm
0,-0.54,,,,,,,1006.42868,,,...,,,,,,,,,,
1,0.01,-1.333277,8.060289,8.31446,-0.571053,0.853277,1.42341,,,,...,,,,-0.048301,0.043927,0.077801,-3.454794,-47.541077,-15.692932,
2,0.02,-1.519939,8.037236,6.289621,-0.663435,0.548666,-0.828132,,,,...,,,,-0.013787,-0.115126,0.007624,-3.275377,-47.394699,-15.088928,5.0
3,0.03,-1.010099,7.053333,4.551286,-0.093932,-0.504608,-1.8618,,,,...,,,,0.062199,-0.121075,0.037894,-3.161579,-47.449356,-15.146851,
4,0.04,-0.723446,6.263126,5.793253,0.074273,-1.115019,-0.295147,,,,...,,,,0.162161,-0.012193,0.098252,-3.133875,-47.220291,-15.232971,


## Walking Data

In [13]:
%cd ../
!ls -F

/content/ml4qs-G39/data/raw
'BikingData1 2025-06-06 10-48-07'/  'WalkingData1 2025-06-06 15-30-08'/


In [14]:
%cd WalkingData1 2025-06-06 15-30-08

/content/ml4qs-G39/data/raw/WalkingData1 2025-06-06 15-30-08


In [15]:
!ls -F

 Accelerometer.csv  'Linear Accelerometer.csv'	 meta/
 Barometer.csv	     Location.csv		 Proximity.csv
 Gyroscope.csv	     Magnetometer.csv


In [16]:
walking_dfs = read_csv_files()
walking_accelerometer_df = walking_dfs['Accelerometer_df']
walking_linear_accelerometer_df = walking_dfs['Linear Accelerometer_df']
walking_barometer_df = walking_dfs['Barometer_df']
walking_location_df = walking_dfs['Location_df']
walking_gyroscope_df = walking_dfs['Gyroscope_df']
walking_magnetometer_df = walking_dfs['Magnetometer_df']
walking_proximity_df = walking_dfs['Proximity_df']

Loaded 'Accelerometer.csv' into DataFrame 'Accelerometer_df'. Shape: (90423, 4)
Loaded 'Linear Accelerometer.csv' into DataFrame 'Linear Accelerometer_df'. Shape: (90423, 4)
Loaded 'Barometer.csv' into DataFrame 'Barometer_df'. Shape: (847, 2)
Loaded 'Location.csv' into DataFrame 'Location_df'. Shape: (899, 8)
Loaded 'Gyroscope.csv' into DataFrame 'Gyroscope_df'. Shape: (90423, 4)
Loaded 'Magnetometer.csv' into DataFrame 'Magnetometer_df'. Shape: (90423, 4)
Loaded 'Proximity.csv' into DataFrame 'Proximity_df'. Shape: (1, 2)

Accelerometer_df head:
   Time (s)  X (m/s^2)  Y (m/s^2)  Z (m/s^2)
0  0.002582  -2.956801   3.204536  12.733271
1  0.012535  -1.568887   2.797832  11.682157
2  0.022488  -0.466879   3.101850  10.308014
3  0.032441  -1.001867   3.238216   8.759784
4  0.042394  -1.313219   2.810705   8.288564

Gyroscope_df head:
   Time (s)  X (rad/s)  Y (rad/s)  Z (rad/s)
0  0.000094  -0.010970   0.069035   0.648594
1  0.010047   0.057596   0.260039   0.495670
2  0.020000  -0.00142

In [17]:
walking_df = combine_dfs(walking_dfs)
walking_df.head()

Combined DataFrame Head:
   Time (s)  Accelerometer_df_X_m/s^2  Accelerometer_df_Y_m/s^2  \
0     -0.53                       NaN                       NaN   
1      0.00                 -2.956801                  3.204536   
2      0.01                 -1.568887                  2.797832   
3      0.02                 -0.466879                  3.101850   
4      0.03                 -1.001867                  3.238216   

   Accelerometer_df_Z_m/s^2  Linear Accelerometer_df_X_m/s^2  \
0                       NaN                              NaN   
1                 12.733271                        -2.192563   
2                 11.682157                        -0.712324   
3                 10.308014                         0.602520   
4                  8.759784                         0.422396   

   Linear Accelerometer_df_Y_m/s^2  Linear Accelerometer_df_Z_m/s^2  \
0                              NaN                              NaN   
1                        -0.171700           

Unnamed: 0,Time (s),Accelerometer_df_X_m/s^2,Accelerometer_df_Y_m/s^2,Accelerometer_df_Z_m/s^2,Linear Accelerometer_df_X_m/s^2,Linear Accelerometer_df_Y_m/s^2,Linear Accelerometer_df_Z_m/s^2,Barometer_df_X_hPa,Location_df_Latitude_°,Location_df_Longitude_°,...,Location_df_Direction_°,Location_df_Horizontal_Accuracy_m,Location_df_Vertical_Accuracy_°,Gyroscope_df_X_rad/s,Gyroscope_df_Y_rad/s,Gyroscope_df_Z_rad/s,Magnetometer_df_X_µT,Magnetometer_df_Y_µT,Magnetometer_df_Z_µT,Proximity_df_Distance_cm
0,-0.53,,,,,,,1008.348923,,,...,,,,,,,,,,
1,0.0,-2.956801,3.204536,12.733271,-2.192563,-0.1717,4.255854,,,,...,,,,-0.01097,0.069035,0.648594,-21.498955,-16.158371,-37.33429,5.0
2,0.01,-1.568887,2.797832,11.682157,-0.712324,-0.519467,2.735299,,,,...,,,,0.057596,0.260039,0.49567,-21.380814,-16.132431,-36.897217,
3,0.02,-0.466879,3.10185,10.308014,0.60252,-0.465167,1.63435,,,,...,,,,-0.001422,0.377463,0.281415,-21.289383,-16.068405,-37.18573,
4,0.03,-1.001867,3.238216,8.759784,0.422396,-0.109258,-0.077208,,,,...,,,,-0.137912,0.269675,0.173701,-21.026146,-15.975693,-37.676147,


# Cleaning

## Filling Nulls using interpolaration

In [18]:
biking_df.isnull().sum()

Unnamed: 0,0
Time (s),0
Accelerometer_df_X_m/s^2,1
Accelerometer_df_Y_m/s^2,1
Accelerometer_df_Z_m/s^2,1
Linear Accelerometer_df_X_m/s^2,1
Linear Accelerometer_df_Y_m/s^2,1
Linear Accelerometer_df_Z_m/s^2,1
Barometer_df_X_hPa,92560
Location_df_Latitude_°,92523
Location_df_Longitude_°,92523


In [19]:
walking_df.isnull().sum()

Unnamed: 0,0
Time (s),0
Accelerometer_df_X_m/s^2,1
Accelerometer_df_Y_m/s^2,1
Accelerometer_df_Z_m/s^2,1
Linear Accelerometer_df_X_m/s^2,1
Linear Accelerometer_df_Y_m/s^2,1
Linear Accelerometer_df_Z_m/s^2,1
Barometer_df_X_hPa,92479
Location_df_Latitude_°,92439
Location_df_Longitude_°,92439


In [21]:
# Interpolate all columns in biking_df
for col in biking_df.columns:
    if biking_df[col].isnull().any():
        biking_df[col].interpolate(method='linear', inplace=True)

# Interpolate all columns in walking_df
for col in walking_df.columns:
    if walking_df[col].isnull().any():
        walking_df[col].interpolate(method='linear', inplace=True)

print("Interpolation complete for biking_df:")
print(biking_df.isnull().sum())

print("\nInterpolation complete for walking_df:")
print(walking_df.isnull().sum())

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  biking_df[col].interpolate(method='linear', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  walking_df[col].interpolate(method='linear', inplace=True)


Interpolation complete for biking_df:
Time (s)                              0
Accelerometer_df_X_m/s^2              1
Accelerometer_df_Y_m/s^2              1
Accelerometer_df_Z_m/s^2              1
Linear Accelerometer_df_X_m/s^2       1
Linear Accelerometer_df_Y_m/s^2       1
Linear Accelerometer_df_Z_m/s^2       1
Barometer_df_X_hPa                    0
Location_df_Latitude_°               81
Location_df_Longitude_°              81
Location_df_Height_m                 81
Location_df_Velocity_m/s             81
Location_df_Direction_°              81
Location_df_Horizontal_Accuracy_m    81
Location_df_Vertical_Accuracy_°      81
Gyroscope_df_X_rad/s                  1
Gyroscope_df_Y_rad/s                  1
Gyroscope_df_Z_rad/s                  1
Magnetometer_df_X_µT                  1
Magnetometer_df_Y_µT                  1
Magnetometer_df_Z_µT                  1
Proximity_df_Distance_cm              2
dtype: int64

Interpolation complete for walking_df:
Time (s)                     

# Analyze Correlation

In [22]:
print("Biking Data:")
identify_correlation(biking_df)

Biking Data:
Pairs with Strong Spearman Correlation (absolute value >= 0.6):

  Time (s) vs Location_df_Latitude_°: -0.9995
  Time (s) vs Location_df_Longitude_°: -0.9851
  Accelerometer_df_X_m/s^2 vs Linear Accelerometer_df_X_m/s^2: 0.8169
  Accelerometer_df_Y_m/s^2 vs Linear Accelerometer_df_Y_m/s^2: 0.8133
  Accelerometer_df_Z_m/s^2 vs Linear Accelerometer_df_Z_m/s^2: 0.8319
  Location_df_Latitude_° vs Location_df_Longitude_°: 0.9845


In [24]:
print("Walking Data:")
identify_correlation(walking_df)

Walking Data:
Pairs with Strong Spearman Correlation (absolute value >= 0.6):

  Time (s) vs Location_df_Latitude_°: -0.9995
  Time (s) vs Location_df_Longitude_°: -0.9851
  Accelerometer_df_X_m/s^2 vs Linear Accelerometer_df_X_m/s^2: 0.8169
  Accelerometer_df_Y_m/s^2 vs Linear Accelerometer_df_Y_m/s^2: 0.8133
  Accelerometer_df_Z_m/s^2 vs Linear Accelerometer_df_Z_m/s^2: 0.8319
  Location_df_Latitude_° vs Location_df_Longitude_°: 0.9845


In [25]:
biking_df.columns

Index(['Time (s)', 'Accelerometer_df_X_m/s^2', 'Accelerometer_df_Y_m/s^2',
       'Accelerometer_df_Z_m/s^2', 'Linear Accelerometer_df_X_m/s^2',
       'Linear Accelerometer_df_Y_m/s^2', 'Linear Accelerometer_df_Z_m/s^2',
       'Barometer_df_X_hPa', 'Location_df_Latitude_°',
       'Location_df_Longitude_°', 'Location_df_Height_m',
       'Location_df_Velocity_m/s', 'Location_df_Direction_°',
       'Location_df_Horizontal_Accuracy_m', 'Location_df_Vertical_Accuracy_°',
       'Gyroscope_df_X_rad/s', 'Gyroscope_df_Y_rad/s', 'Gyroscope_df_Z_rad/s',
       'Magnetometer_df_X_µT', 'Magnetometer_df_Y_µT', 'Magnetometer_df_Z_µT',
       'Proximity_df_Distance_cm'],
      dtype='object')

# Feature Engineering

## Time Based

In [26]:
## TODO using meta data (in meta folder) calculate time of day

## Linear Acceleration


In [31]:
def calculate_linear_acceleration_magnitude(df):
  # Total Linear Acceleration Magnitude
  df['linear_acceleration_magnitude_m/s^2'] = np.sqrt(
      df['Linear Accelerometer_df_X_m/s^2']**2 +
      df['Linear Accelerometer_df_Y_m/s^2']**2 +
      df['Linear Accelerometer_df_Z_m/s^2']**2
  )
  return df

# Biking df
biking_df = calculate_linear_acceleration_magnitude(biking_df)

# Walking df
walking_df = calculate_linear_acceleration_magnitude(walking_df)
walking_df.head()

Unnamed: 0,Time (s),Accelerometer_df_X_m/s^2,Accelerometer_df_Y_m/s^2,Accelerometer_df_Z_m/s^2,Linear Accelerometer_df_X_m/s^2,Linear Accelerometer_df_Y_m/s^2,Linear Accelerometer_df_Z_m/s^2,Barometer_df_X_hPa,Location_df_Latitude_°,Location_df_Longitude_°,...,Location_df_Horizontal_Accuracy_m,Location_df_Vertical_Accuracy_°,Gyroscope_df_X_rad/s,Gyroscope_df_Y_rad/s,Gyroscope_df_Z_rad/s,Magnetometer_df_X_µT,Magnetometer_df_Y_µT,Magnetometer_df_Z_µT,Proximity_df_Distance_cm,linear_acceleration_magnitude_m/s^2
0,-0.53,,,,,,,1008.348923,,,...,,,,,,,,,,
1,0.0,-2.956801,3.204536,12.733271,-2.192563,-0.1717,4.255854,1008.348929,,,...,,,-0.01097,0.069035,0.648594,-21.498955,-16.158371,-37.33429,5.0,4.790523
2,0.01,-1.568887,2.797832,11.682157,-0.712324,-0.519467,2.735299,1008.348935,,,...,,,0.057596,0.260039,0.49567,-21.380814,-16.132431,-36.897217,5.0,2.873867
3,0.02,-0.466879,3.10185,10.308014,0.60252,-0.465167,1.63435,1008.348941,,,...,,,-0.001422,0.377463,0.281415,-21.289383,-16.068405,-37.18573,5.0,1.802917
4,0.03,-1.001867,3.238216,8.759784,0.422396,-0.109258,-0.077208,1008.348947,,,...,,,-0.137912,0.269675,0.173701,-21.026146,-15.975693,-37.676147,5.0,0.443077


## Turning Rate / Change in Direction

In [33]:
def calculate_turning_rate(df):
  # Turning Rate / Change in Direction
  # Handle circular nature of degrees (0-360)
  df['direction_diff_deg'] = df['Location_df_Direction_°'].diff().apply(
      lambda x: (x + 180) % 360 - 180 if pd.notna(x) else x
  )
  df['dt_s'] = df['Time (s)'].diff()
  df['turning_rate_deg/s'] = df['direction_diff_deg'] / df['dt_s']
  df['absolute_turning_rate_deg/s'] = df['turning_rate_deg/s'].abs()
  return df

# Biking df
biking_df = calculate_turning_rate(biking_df)

# Walking df
walking_df = calculate_turning_rate(walking_df)
walking_df.head()

Unnamed: 0,Time (s),Accelerometer_df_X_m/s^2,Accelerometer_df_Y_m/s^2,Accelerometer_df_Z_m/s^2,Linear Accelerometer_df_X_m/s^2,Linear Accelerometer_df_Y_m/s^2,Linear Accelerometer_df_Z_m/s^2,Barometer_df_X_hPa,Location_df_Latitude_°,Location_df_Longitude_°,...,Gyroscope_df_Z_rad/s,Magnetometer_df_X_µT,Magnetometer_df_Y_µT,Magnetometer_df_Z_µT,Proximity_df_Distance_cm,linear_acceleration_magnitude_m/s^2,direction_diff_deg,dt_s,turning_rate_deg/s,absolute_turning_rate_deg/s
0,-0.53,,,,,,,1008.348923,,,...,,,,,,,,,,
1,0.0,-2.956801,3.204536,12.733271,-2.192563,-0.1717,4.255854,1008.348929,,,...,0.648594,-21.498955,-16.158371,-37.33429,5.0,4.790523,,0.53,,
2,0.01,-1.568887,2.797832,11.682157,-0.712324,-0.519467,2.735299,1008.348935,,,...,0.49567,-21.380814,-16.132431,-36.897217,5.0,2.873867,,0.01,,
3,0.02,-0.466879,3.10185,10.308014,0.60252,-0.465167,1.63435,1008.348941,,,...,0.281415,-21.289383,-16.068405,-37.18573,5.0,1.802917,,0.01,,
4,0.03,-1.001867,3.238216,8.759784,0.422396,-0.109258,-0.077208,1008.348947,,,...,0.173701,-21.026146,-15.975693,-37.676147,5.0,0.443077,,0.01,,


## Acceleration from GPS

In [34]:
def calculate_gps_acceleration(df):
  # Acceleration from GPS
  df['gps_acceleration_m/s^2'] = df['Location_df_Velocity_m/s'].diff() / df['dt_s']
  return df

# Biking df
biking_df = calculate_gps_acceleration(biking_df)

# Walking df
walking_df = calculate_gps_acceleration(walking_df)
walking_df.head()

Unnamed: 0,Time (s),Accelerometer_df_X_m/s^2,Accelerometer_df_Y_m/s^2,Accelerometer_df_Z_m/s^2,Linear Accelerometer_df_X_m/s^2,Linear Accelerometer_df_Y_m/s^2,Linear Accelerometer_df_Z_m/s^2,Barometer_df_X_hPa,Location_df_Latitude_°,Location_df_Longitude_°,...,Magnetometer_df_X_µT,Magnetometer_df_Y_µT,Magnetometer_df_Z_µT,Proximity_df_Distance_cm,linear_acceleration_magnitude_m/s^2,direction_diff_deg,dt_s,turning_rate_deg/s,absolute_turning_rate_deg/s,gps_acceleration_m/s^2
0,-0.53,,,,,,,1008.348923,,,...,,,,,,,,,,
1,0.0,-2.956801,3.204536,12.733271,-2.192563,-0.1717,4.255854,1008.348929,,,...,-21.498955,-16.158371,-37.33429,5.0,4.790523,,0.53,,,
2,0.01,-1.568887,2.797832,11.682157,-0.712324,-0.519467,2.735299,1008.348935,,,...,-21.380814,-16.132431,-36.897217,5.0,2.873867,,0.01,,,
3,0.02,-0.466879,3.10185,10.308014,0.60252,-0.465167,1.63435,1008.348941,,,...,-21.289383,-16.068405,-37.18573,5.0,1.802917,,0.01,,,
4,0.03,-1.001867,3.238216,8.759784,0.422396,-0.109258,-0.077208,1008.348947,,,...,-21.026146,-15.975693,-37.676147,5.0,0.443077,,0.01,,,
