# Synthetic Data
**The goal** of this project is to develop a system that, given temperature, humidity and gas sensor readings, is able to predict in which room of the
house is infrared sensor is detecting an activity.

In [1]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [22]:
df_train = pd.read_csv("/content/drive/MyDrive/data_synth_train.csv")
df_test = pd.read_csv("/content/drive/MyDrive/data_synth_test.csv")

In [3]:
df_test.head(5)

Unnamed: 0,timestamp,temperature,humidity,CO2CosIRValue,CO2MG811Value,MOX1,MOX2,MOX3,MOX4,COValue
0,2020-01-17 03:53:23,21.98,55.12,102,530,444,695,652,586,109
1,2020-01-17 03:53:43,22.08,55.07,102,529,444,696,650,585,107
2,2020-01-17 03:54:03,22.14,55.13,102,528,442,694,651,583,108
3,2020-01-17 03:54:23,22.07,55.21,102,529,442,694,651,585,108
4,2020-01-17 03:54:43,22.03,55.07,101,530,443,694,651,583,108


In [4]:
df_train.head(5)

Unnamed: 0,timestamp,temperature,humidity,CO2CosIRValue,CO2MG811Value,MOX1,MOX2,MOX3,MOX4,COValue,Living room,Bedroom,Bathroom,Kitchen,Hallway
0,2019-11-07 00:00:07,21.26,57.74,109,531,489,705,659,600,122,0,0,0,0,0
1,2019-11-07 00:00:27,21.19,58.03,107,533,494,707,658,603,122,0,0,0,0,0
2,2019-11-07 00:00:47,21.11,57.53,107,530,491,708,656,596,121,1,0,0,0,0
3,2019-11-07 00:01:07,21.12,57.94,106,532,489,710,656,597,126,1,0,0,0,0
4,2019-11-07 00:01:27,21.06,58.18,103,532,490,705,658,597,130,1,0,0,0,0


## Preprocess
Handle missing values and normalize the data

In [5]:
from sklearn.preprocessing import StandardScaler

def filter_vals(a):
  try:
    float(a)
    return True
  except ValueError:
    return False

filtered_temp = list(map(float, list(filter(filter_vals, df_test['temperature'].values))))
filtered_humidity = list(map(float,list(filter(filter_vals, df_test['humidity'].values))))
filter_CosIR = list(map(float, list(filter(filter_vals, df_test['CO2CosIRValue'].values))))
filter_MG811 = list(map(float, list(filter(filter_vals, df_test['CO2MG811Value'].values))))
filter_mox1 = list(map(float, list(filter(filter_vals, df_test['MOX1'].values))))
filter_mox2 = list(map(float, list(filter(filter_vals, df_test['MOX2'].values))))
filter_mox3 = list(map(float, list(filter(filter_vals, df_test['MOX3'].values))))
filter_mox4 = list(map(float, list(filter(filter_vals, df_test['MOX4'].values))))

avg = {}
avg['temperature'] = sum(filtered_temp)/ len(filtered_temp)
avg['humidity'] = sum(filtered_humidity)/ len(filtered_humidity)
avg['CO2CosIRValue'] = sum(filter_CosIR)/ len(filter_CosIR)
avg['CO2MG811Value'] = sum(filter_MG811)/ len(filter_MG811)
avg['MOX1'] = sum(filter_mox1)/ len(filter_mox1)
avg['MOX2'] = sum(filter_mox2)/ len(filter_mox2)
avg['MOX3'] = sum(filter_mox3)/len(filter_mox3)
avg['MOX4'] = sum(filter_mox4)/len(filter_mox4)

def handle_missing_vals(col):
  new = []
  for a in col.values:
    try:
      float(a)
      new.append(a)
    except ValueError:
      new.append(avg[col.name])
  return new


def preprocess(data_train, data_test):
    data_test['temperature'] = handle_missing_vals(data_test['temperature'])
    data_test['humidity'] = handle_missing_vals(data_test['humidity'])
    data_test['CO2CosIRValue'] = handle_missing_vals(data_test['CO2CosIRValue'])
    data_test['CO2MG811Value'] = handle_missing_vals(data_test['CO2MG811Value'])
    data_test['MOX1'] = handle_missing_vals(data_test['MOX1'])
    data_test['MOX2'] = handle_missing_vals(data_test['MOX2'])
    data_test['MOX3'] = handle_missing_vals(data_test['MOX3'])
    data_test['MOX4'] = handle_missing_vals(data_test['MOX4'])

    scaler = StandardScaler()
    normalized_data = scaler.fit_transform(data_train)
    data_test = scaler.transform(data_test)

    return normalized_data, data_test


In [6]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
df_train['timestamp'] = pd.to_datetime(df_train['timestamp'])
df_train['hour'] = df_train['timestamp'].dt.hour
df_train['day_of_week'] = df_train['timestamp'].dt.dayofweek



features_to_scale = ['temperature',	'humidity',	'CO2CosIRValue',	'CO2MG811Value',	'MOX1',	'MOX2',	'MOX3',	'MOX4']

df_train[features_to_scale], df_test[features_to_scale] = preprocess(df_train[features_to_scale], df_test[features_to_scale])


y_living_room = df_train[['Living room']].values
y_bedroom = df_train[['Bedroom']].values
y_bathroom = df_train[['Bathroom']].values
y_kitchen = df_train[['Kitchen']].values
y_hallway = df_train[['Hallway']].values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_test['temperature'] = handle_missing_vals(data_test['temperature'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_test['humidity'] = handle_missing_vals(data_test['humidity'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_test['CO2CosIRValue'] = handle_missing_vals(data_test['CO2Co

In [7]:
X_train = df_train[features_to_scale].values
X_test = df_test[features_to_scale].values

In [8]:
sequence_length=10

sequences_train = []
y_living_room_train = []
y_bathroom_train = []
y_kitchen_train = []
y_bedroom_train = []
y_hallway_train = []

for i in range(len(X_train) - sequence_length +1):
  sequences_train.append(X_train[i:i+sequence_length])

sequence_test = []
for i in range(len(X_test)-sequence_length +1):
  sequence_test.append(X_test[i:i+sequence_length])


## Building temporal convolution

In [9]:
from tensorflow.keras import layers
from tensorflow import keras

model = keras.Sequential([
  keras.Input(shape=(10,8)),
  layers.Conv1D(64,kernel_size = 3,activation='relu'),
  layers.Conv1D(64,kernel_size = 3,activation='relu'),
  layers.Dropout((0.5)),
  layers.MaxPooling1D(pool_size=2),
  layers.Flatten(),
  layers.Dense(100, activation='relu'),
  layers.Dense(1, activation='sigmoid'),
])


In [10]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(tf.convert_to_tensor(sequences_train),tf.convert_to_tensor(y_living_room[:304559]), epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7b64a6221ae0>

In [13]:
y_predict_living_room = model.predict(tf.convert_to_tensor(sequence_test))



In [15]:
model.fit(tf.convert_to_tensor(sequences_train), tf.convert_to_tensor(y_bedroom[:304559]), batch_size=32 ,epochs=15, verbose=1)
y_predict_bedroom = model.predict(tf.convert_to_tensor(sequence_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [17]:
model.fit(tf.convert_to_tensor(sequences_train), tf.convert_to_tensor(y_bathroom[:304559]), batch_size=32,epochs=10, verbose=1)
y_predict_bathroom = model.predict(tf.convert_to_tensor(sequence_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
model.fit(tf.convert_to_tensor(sequences_train), tf.convert_to_tensor(y_kitchen[:304559]), batch_size=32,epochs=10, verbose=1)
y_predict_kitchen = model.predict(tf.convert_to_tensor(sequence_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
model.fit(tf.convert_to_tensor(sequences_train), tf.convert_to_tensor(y_hallway[:304559]),batch_size=32, epochs=10, verbose=1)
y_predict_hallway = model.predict(tf.convert_to_tensor(sequence_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Result

In [27]:
df_result = df_test.iloc[:64072]

In [28]:
df_result["Living Room"] = y_predict_living_room.flatten()
df_result["Bedroom"] = y_predict_bedroom.flatten()
df_result["Bathroom"] = y_predict_bathroom.flatten()
df_result["Kitchen"] = y_predict_kitchen.flatten()
df_result["Hallway"] = y_predict_hallway.flatten()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result["Living Room"] = y_predict_living_room.flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result["Bedroom"] = y_predict_bedroom.flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result["Bathroom"] = y_predict_bathroom.flatten()
A value is trying to be set on a copy of a

In [30]:
df_result.head(10)

Unnamed: 0,timestamp,temperature,humidity,CO2CosIRValue,CO2MG811Value,MOX1,MOX2,MOX3,MOX4,COValue,Living Room,Bedroom,Bathroom,Kitchen,Hallway
0,2020-01-17 03:53:23,21.98,55.12,102,530,444,695,652,586,109,0.014883,0.026997,0.035073,0.067751,0.011635
1,2020-01-17 03:53:43,22.08,55.07,102,529,444,696,650,585,107,0.014795,0.026864,0.035671,0.068137,0.012115
2,2020-01-17 03:54:03,22.14,55.13,102,528,442,694,651,583,108,0.014725,0.026727,0.035494,0.069399,0.012294
3,2020-01-17 03:54:23,22.07,55.21,102,529,442,694,651,585,108,0.014676,0.026813,0.035223,0.071621,0.012313
4,2020-01-17 03:54:43,22.03,55.07,101,530,443,694,651,583,108,0.014607,0.026962,0.034305,0.071464,0.011942
5,2020-01-17 03:55:03,21.96,54.94,101,529,444,694,650,584,108,0.014724,0.02705,0.033557,0.073527,0.011712
6,2020-01-17 03:55:23,21.95,55.02,102,527,442,694,652,583,108,0.014758,0.026985,0.03284,0.072658,0.011617
7,2020-01-17 03:55:43,21.97,55.14,102,530,443,694,651,584,108,0.015041,0.026796,0.032862,0.071611,0.011709
8,2020-01-17 03:56:03,22.03,55.01,102,528,441,695,652,585,108,0.015157,0.02663,0.032947,0.071436,0.011792
9,2020-01-17 03:56:23,21.86,55.08,103,529,443,693,650,584,108,0.015127,0.026357,0.032797,0.068905,0.011734
