<a href="https://colab.research.google.com/github/swetzel1/introduction_to_ml/blob/main/project/final_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Physical Activity Monitoring

In [1]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load Data

In [2]:
folder_structure = '/content/drive/My Drive/Intro_to_ML/final_project/' #change this to your correspoding folder

file_names = ['subject101.dat',
              'subject102.dat',
              'subject103.dat',
              'subject104.dat',
              'subject105.dat',
              'subject106.dat',
              'subject107.dat',
              'subject108.dat',
              'subject109.dat' ]

In [3]:
file_names = [folder_structure + name for name in file_names ]

In [4]:
subject_id = [1,2,3,4,5,6,7,8,9]

activity_id_dict = {0: 'transient',
                    1: 'lying',
                    2: 'sitting',
                    3: 'standing',
                    4: 'walking',
                    5: 'running',
                    6: 'cycling',
                    7: 'Nordic_walking',
                    9: 'watching_TV',
                    10: 'computer_work',
                    11: 'car driving',
                    12: 'ascending_stairs',
                    13: 'descending_stairs',
                    16: 'vacuum_cleaning',
                    17: 'ironing',
                    18: 'folding_laundry',
                    19: 'house_cleaning',
                    20: 'playing_soccer',
                    24: 'rope_jumping' }

special_columns = ["timestamp", "activity_id","heartrate"]


imu_hand = ['hand_temperature',
            'hand_Acc6_1',       'hand_Acc6_2',        'hand_Acc6_3',
            'hand_Acc16_1',      'hand_Acc16_2',       'hand_Acc16_3',
            'hand_Gyro1',        'hand_Gyro2',         'hand_Gyro3',
            'hand_Magne1',       'hand_Magne2',        'hand_Magne3',
            'hand_Orientation1', 'hand_Orientation2',  'hand_Orientation3', 'hand_Orientation4']

imu_chest = ['chest_temperature',
             'chest_Acc16_1',       'chest_Acc16_2',      'chest_Acc16_3',
             'chest_Acc6_1',        'chest_Acc6_2',       'chest_Acc6_3',
             'chest_Gyro1',         'chest_Gyro2',        'chest_Gyro3',
             'chest_Magne1',        'chest_Magne2',       'chest_Magne3',
             'chest_Orientation1',  'chest_Orientation2', 'chest_Orientation3', 'chest_Orientation4']

imu_ankle = ['ankle_temperature',
           'ankle_Acc16_1',       'ankle_Acc16_2',      'ankle_Acc16_3',
           'ankle_Acc6_1',        'ankle_Acc6_2',       'ankle_Acc6_3',
           'ankle_Gyro1',         'ankle_Gyro2',        'ankle_Gyro3',
           'ankle_Magne1',        'ankle_Magne2',       'ankle_Magne3',
           'ankle_Orientation1',  'ankle_Orientation2', 'ankle_Orientation3', 'ankle_Orientation4']

columns = special_columns + imu_hand + imu_chest + imu_ankle  #put columns in one list

len(columns)

54

In [5]:
raw_data = pd.DataFrame()
for file in file_names:
    buffer = pd.read_table(file, header=None, sep='\s+')
    buffer.columns = columns #name columns
    buffer['subject_id'] = int(file[-5]) #add subject number to table (from filename)
    #dataCollection = dataCollection.append(procData, ignore_index=True) #does concat have anothery syntax than append?
    raw_data = pd.concat([raw_data, buffer], ignore_index=True)

raw_data.reset_index(drop=True, inplace=True)
raw_data.head()

Unnamed: 0,timestamp,activity_id,heartrate,hand_temperature,hand_Acc6_1,hand_Acc6_2,hand_Acc6_3,hand_Acc16_1,hand_Acc16_2,hand_Acc16_3,...,ankle_Gyro2,ankle_Gyro3,ankle_Magne1,ankle_Magne2,ankle_Magne3,ankle_Orientation1,ankle_Orientation2,ankle_Orientation3,ankle_Orientation4,subject_id
0,8.38,0,104.0,30.0,2.37223,8.60074,3.51048,2.43954,8.76165,3.35465,...,0.00925,-0.01758,-61.1888,-38.9599,-58.1438,1.0,0.0,0.0,0.0,1
1,8.39,0,,30.0,2.18837,8.5656,3.66179,2.39494,8.55081,3.64207,...,-0.004638,0.000368,-59.8479,-38.8919,-58.5253,1.0,0.0,0.0,0.0,1
2,8.4,0,,30.0,2.37357,8.60107,3.54898,2.30514,8.53644,3.7328,...,0.000148,0.022495,-60.7361,-39.4138,-58.3999,1.0,0.0,0.0,0.0,1
3,8.41,0,,30.0,2.07473,8.52853,3.66021,2.33528,8.53622,3.73277,...,-0.020301,0.011275,-60.4091,-38.7635,-58.3956,1.0,0.0,0.0,0.0,1
4,8.42,0,,30.0,2.22936,8.83122,3.7,2.23055,8.59741,3.76295,...,-0.014303,-0.002823,-61.5199,-39.3879,-58.2694,1.0,0.0,0.0,0.0,1


## Data Cleaning

Additional files that come with the dataset explain how the data was gathered and give information on which parts of the data can be used.

* remove transient activity(id == 0), no usefull data
* remove columns 'Orientation' --> not used in information gathering
* remove columns 'accelerometer 6g' --> was not precicly calibrated during the data aquisition

* remove subject 9

* remove timestamp, since it doesnt contain any meaningful information correlated to the activity performed at the time

* remove subject id

* interpolate missing values due to:
  * different frequencies (heartrate sensor - 9Hz, IMUs 100Hz)
  * connection lost, package drops in wireless transmission



In [18]:
data = raw_data.drop(['hand_Orientation1',   'hand_Orientation2',  'hand_Orientation3',  'hand_Orientation4',
                      'chest_Orientation1',  'chest_Orientation2', 'chest_Orientation3', 'chest_Orientation4',
                      'ankle_Orientation1',  'ankle_Orientation2', 'ankle_Orientation3', 'ankle_Orientation4'],
                    axis = 1)  # removal of orientation columns

data = data.drop(['hand_Acc6_1',  'hand_Acc6_2',   'hand_Acc6_3',
                  'chest_Acc6_1', 'chest_Acc6_2',  'chest_Acc6_3',
                  'ankle_Acc6_1', 'ankle_Acc6_2',  'ankle_Acc6_3',],
                    axis = 1) # remove columns of accelerometer6g

data = data.drop(data[data.subject_id == 9].index) #(maybe not even load in first place ...)

In [19]:
#linear interpolation - think about other/better methods
data = data.interpolate() #interpolate before transient activity gets removed

In [None]:
data = data.drop(data[data.activity_id == 0].index) #remove any row of activity 0 (transient)

In [23]:
data = data.drop(['timestamp', 'subject_id'], axis = 1)

In [24]:
data.reset_index(drop = True, inplace = True)
data.head()

Unnamed: 0,activity_id,heartrate,hand_temperature,hand_Acc16_1,hand_Acc16_2,hand_Acc16_3,hand_Gyro1,hand_Gyro2,hand_Gyro3,hand_Magne1,...,ankle_temperature,ankle_Acc16_1,ankle_Acc16_2,ankle_Acc16_3,ankle_Gyro1,ankle_Gyro2,ankle_Gyro3,ankle_Magne1,ankle_Magne2,ankle_Magne3
0,1,100.0,30.375,2.24689,8.55387,5.77143,-0.00475,0.037579,-0.011145,8.932,...,30.75,9.73855,-1.84761,0.095156,0.002908,-0.027714,0.001752,-61.1081,-36.8636,-58.3696
1,1,100.0,30.375,2.27373,8.14592,5.78739,-0.17171,0.025479,-0.009538,9.583,...,30.75,9.69762,-1.88438,-0.020804,0.020882,0.000945,0.006007,-60.8916,-36.3197,-58.3656
2,1,100.0,30.375,2.26966,7.66268,5.78846,-0.238241,0.011214,0.000831,9.05516,...,30.75,9.69633,-1.92203,-0.059173,-0.035392,-0.052422,-0.004882,-60.3407,-35.7842,-58.6119
3,1,100.0,30.375,2.22177,7.25535,5.88,-0.192912,0.019053,0.013374,9.92698,...,30.75,9.6637,-1.84714,0.094385,-0.032514,-0.018844,0.02695,-60.7646,-37.1028,-57.8799
4,1,100.0,30.375,2.2072,7.24042,5.95555,-0.069961,-0.018328,0.004582,9.15626,...,30.75,9.77578,-1.88582,0.095775,0.001351,-0.048878,-0.006328,-60.204,-37.1225,-57.8847


In [25]:
data.isnull().sum() #check for missing values left after interpolation

activity_id          0
heartrate            0
hand_temperature     0
hand_Acc16_1         0
hand_Acc16_2         0
hand_Acc16_3         0
hand_Gyro1           0
hand_Gyro2           0
hand_Gyro3           0
hand_Magne1          0
hand_Magne2          0
hand_Magne3          0
chest_temperature    0
chest_Acc16_1        0
chest_Acc16_2        0
chest_Acc16_3        0
chest_Gyro1          0
chest_Gyro2          0
chest_Gyro3          0
chest_Magne1         0
chest_Magne2         0
chest_Magne3         0
ankle_temperature    0
ankle_Acc16_1        0
ankle_Acc16_2        0
ankle_Acc16_3        0
ankle_Gyro1          0
ankle_Gyro2          0
ankle_Gyro3          0
ankle_Magne1         0
ankle_Magne2         0
ankle_Magne3         0
dtype: int64