## Cooking activity recognition using indoor air quality sensors

In [1]:
import numpy as np
import pandas as pd

1. import data

In [23]:
path = 'data/uHooD.csv'
df = pd.read_csv(path)
df.head(5)


Unnamed: 0,Date and Time,Temperature,Relative Humidity,PM2.5,TVOC,CO2,CO,Air Pressure,Ozone,NO2,...,Torta,Minestra,Carne in padella,Trippa,Carne in umido,Frittura,Window,Fornello,Macchina espresso,Forno elettrico
0,2018-04-19 00:00,22.4,66.6,29.96,97.0,577.0,0.0,1023.62,5.19,0.74,...,0,0,0,0,0,0,Trascurabile,0,0,0
1,2018-04-19 00:01,22.4,66.5,32.73,98.0,574.0,0.0,1023.58,5.16,0.42,...,0,0,0,0,0,0,Trascurabile,0,0,0
2,2018-04-19 00:02,22.4,66.69,19.7,98.0,592.0,0.0,1023.6,5.15,0.73,...,0,0,0,0,0,0,Trascurabile,0,0,0
3,2018-04-19 00:03,22.4,66.63,18.13,98.0,609.0,0.0,1023.57,5.15,0.47,...,0,0,0,0,0,0,Trascurabile,0,0,0
4,2018-04-19 00:04,22.4,66.59,41.6,100.0,607.0,0.0,1023.57,5.14,0.49,...,0,0,0,0,0,0,Trascurabile,0,0,0


2. remove food cooked class

In [24]:
df_no_specific_cooking = df.iloc[:, : -23]
df_no_specific_cooking.head(5)

Unnamed: 0,Date and Time,Temperature,Relative Humidity,PM2.5,TVOC,CO2,CO,Air Pressure,Ozone,NO2,Activity
0,2018-04-19 00:00,22.4,66.6,29.96,97.0,577.0,0.0,1023.62,5.19,0.74,
1,2018-04-19 00:01,22.4,66.5,32.73,98.0,574.0,0.0,1023.58,5.16,0.42,
2,2018-04-19 00:02,22.4,66.69,19.7,98.0,592.0,0.0,1023.6,5.15,0.73,
3,2018-04-19 00:03,22.4,66.63,18.13,98.0,609.0,0.0,1023.57,5.15,0.47,
4,2018-04-19 00:04,22.4,66.59,41.6,100.0,607.0,0.0,1023.57,5.14,0.49,


3. drop row if there are any empty attribute

In [25]:
print("before: " + str(len(df_no_specific_cooking.index)))
df_no_specific_cooking.dropna(subset = ["Temperature", "Relative Humidity", "PM2.5", "TVOC", "CO2", "CO", "Air Pressure", "Ozone", "NO2"], inplace=True)
print("after: " + str(len(df_no_specific_cooking.index)))

before: 55829
after: 55829


4. rename class column: Activity -> class

In [26]:
df_no_specific_cooking.rename(columns = {'Activity': 'class'}, inplace = True)
df_no_specific_cooking.head(1)

Unnamed: 0,Date and Time,Temperature,Relative Humidity,PM2.5,TVOC,CO2,CO,Air Pressure,Ozone,NO2,class
0,2018-04-19 00:00,22.4,66.6,29.96,97.0,577.0,0.0,1023.62,5.19,0.74,


5. normalize class values

In [27]:
df_no_specific_cooking['class'].unique()
class_mapping = {
    'None': 0,
    'Colazione': 1,
    'Pranzo': 1,
    'Cena': 1
}

df_no_specific_cooking['class'] = df_no_specific_cooking['class'].map(class_mapping)
df_no_specific_cooking['class'].unique()

array([0, 1])

6. PCA

In [42]:
from sklearn.preprocessing import StandardScaler

df = df_no_specific_cooking

features = ["Temperature", "Relative Humidity", "PM2.5", "TVOC", "CO2", "CO", "Air Pressure", "Ozone", "NO2"]
x = df.loc[:, features].values
y = df.loc[:,['class']].values
x = StandardScaler().fit_transform(x)

In [43]:
from sklearn.decomposition import PCA

pca = PCA(n_components=len(features))
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9'])

In [46]:
finalDf = pd.concat([df[['Date and Time']], principalDf, df[['class']]], axis = 1)
finalDf.head(5)


Unnamed: 0,Date and Time,p1,p2,p3,p4,p5,p6,p7,p8,p9,class
0,2018-04-19 00:00,0.96317,0.85272,-0.553295,-0.082467,0.217235,2.234763,-0.593755,-0.363818,-0.097237,0
1,2018-04-19 00:01,0.942435,0.921742,-0.681599,-0.063085,0.024214,2.317459,-0.612724,-0.370516,-0.117996,0
2,2018-04-19 00:02,0.838728,0.528732,-0.339622,-0.148821,0.930609,1.883154,-0.546522,-0.421438,-0.079885,0
3,2018-04-19 00:03,0.781268,0.484412,-0.335957,-0.159006,1.038585,1.819713,-0.508757,-0.46965,-0.124928,0
4,2018-04-19 00:04,0.938856,1.246338,-0.821653,-0.007542,-0.605295,2.610545,-0.549717,-0.35414,-0.098492,0


7. remove outliers