In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
import tensorflow as tf
import keras.backend as K

def f1_score_macro(y_true, y_pred):

    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

In [None]:
df = pd.read_csv('Occupancy_Estimation_r.csv')
df.index = df.Date + ' ' + df.Time
df.drop(['Date', 'Time'], axis=1, inplace=True)
df

Unnamed: 0,S1_Temp,S2_Temp,S3_Temp,S4_Temp,S1_Light,S2_Light,S3_Light,S4_Light,S1_Sound,S2_Sound,S3_Sound,S4_Sound,S5_CO2,S5_CO2_Slope,S6_PIR,S7_PIR,Room_Occupancy_Count
2017/12/22 10:49:41,24.94,24.75,24.56,25.38,121,34,53,40,0.08,0.19,0.06,0.06,390,0.769231,0,0,1
2017/12/22 10:50:12,24.94,24.75,24.56,25.44,121,33,53,40,0.93,0.05,0.06,0.06,390,0.646154,0,0,1
2017/12/22 10:50:42,25.00,24.75,24.50,25.44,121,34,53,40,0.43,0.11,0.08,0.06,390,0.519231,0,0,1
2017/12/22 10:51:13,25.00,24.75,24.56,25.44,121,34,53,40,0.41,0.10,0.10,0.09,390,0.388462,0,0,1
2017/12/22 10:51:44,25.00,24.75,24.56,25.44,121,34,54,40,0.18,0.06,0.06,0.06,390,0.253846,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018/01/11 08:58:07,25.06,25.13,24.69,25.31,6,7,33,22,0.09,0.04,0.06,0.08,345,0.000000,0,0,0
2018/01/11 08:58:37,25.06,25.06,24.69,25.25,6,7,34,22,0.07,0.05,0.05,0.08,345,0.000000,0,0,0
2018/01/11 08:59:08,25.13,25.06,24.69,25.25,6,7,34,22,0.11,0.05,0.06,0.08,345,0.000000,0,0,0
2018/01/11 08:59:39,25.13,25.06,24.69,25.25,6,7,34,22,0.08,0.08,0.10,0.08,345,0.000000,0,0,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10129 entries, 2017/12/22 10:49:41 to 2018/01/11 09:00:09
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   S1_Temp               10129 non-null  float64
 1   S2_Temp               10129 non-null  float64
 2   S3_Temp               10129 non-null  float64
 3   S4_Temp               10129 non-null  float64
 4   S1_Light              10129 non-null  int64  
 5   S2_Light              10129 non-null  int64  
 6   S3_Light              10129 non-null  int64  
 7   S4_Light              10129 non-null  int64  
 8   S1_Sound              10129 non-null  float64
 9   S2_Sound              10129 non-null  float64
 10  S3_Sound              10129 non-null  float64
 11  S4_Sound              10129 non-null  float64
 12  S5_CO2                10129 non-null  int64  
 13  S5_CO2_Slope          10129 non-null  float64
 14  S6_PIR                10129 non-null  int64

In [None]:
df = df.astype({
    'S1_Light': np.float64,
    'S2_Light': np.float64,
    'S3_Light': np.float64,
    'S4_Light': np.float64,
    'S5_CO2': np.float64,
    'S6_PIR': np.float64,
    'S7_PIR': np.float64,
})
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10129 entries, 2017/12/22 10:49:41 to 2018/01/11 09:00:09
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   S1_Temp               10129 non-null  float64
 1   S2_Temp               10129 non-null  float64
 2   S3_Temp               10129 non-null  float64
 3   S4_Temp               10129 non-null  float64
 4   S1_Light              10129 non-null  float64
 5   S2_Light              10129 non-null  float64
 6   S3_Light              10129 non-null  float64
 7   S4_Light              10129 non-null  float64
 8   S1_Sound              10129 non-null  float64
 9   S2_Sound              10129 non-null  float64
 10  S3_Sound              10129 non-null  float64
 11  S4_Sound              10129 non-null  float64
 12  S5_CO2                10129 non-null  float64
 13  S5_CO2_Slope          10129 non-null  float64
 14  S6_PIR                10129 non-null  float

In [None]:
df['Room_Occupancy_Count'].value_counts()

0    8228
2     748
3     694
1     459
Name: Room_Occupancy_Count, dtype: int64

In [None]:
df.shape

(10129, 17)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.drop('Room_Occupancy_Count', axis=1),
                                                    df['Room_Occupancy_Count'],
                                                    test_size=0.25,
                                                    random_state=19)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((7596, 16), (2533, 16), (7596,), (2533,))

In [None]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from keras.regularizers import l2

In [None]:
timesteps = 60 
prep = X_train.iloc[-(timesteps-1):]
X_test = pd.concat([prep, X_test], axis=0)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((7596, 16), (2592, 16), (7596,), (2533,))

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train.values),
                              index=X_train.index,
                              columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.fit_transform(X_test.values),
                             index=X_test.index,
                             columns=X_test.columns)
X_train_scaled.shape, X_test_scaled.shape

((7596, 16), (2592, 16))

In [None]:
X_t, y_t = [], []
for i in range(y_train.shape[0] - (timesteps-1)):
    X_t.append(X_train_scaled.iloc[i:i+timesteps].values)
    y_t.append(y_train.iloc[i+(timesteps-1)])
X_train, y_train = np.array(X_t), np.array(y_t).reshape(-1, 1)
print(f'Train data dimensions: {X_train.shape}, {y_train.shape}')

Train data dimensions: (7537, 60, 16), (7537, 1)


In [None]:
X_tst, y_tst = [], []
for i in range(y_test.shape[0]):
    X_tst.append(X_test_scaled.iloc[i:i+timesteps].values)
    y_tst.append(y_test.iloc[i])
X_test, y_test = np.array(X_tst), np.array(y_tst).reshape(-1,1)
print(f'Test data dimensions: {X_test.shape}, {y_test.shape}')

Test data dimensions: (2533, 60, 16), (2533, 1)


In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from keras.regularizers import l2

# RNN

In [None]:
N = X_train.shape[2]
LEARNING_RATE = 1e-4
BATCH_SIZE = 60
PENALTY = 0.05
CLASSES = len(np.unique(y_test))

In [None]:
import tensorflow as tf
y_train=tf.keras.utils.to_categorical(y_train, num_classes=CLASSES)


In [None]:
y_test=tf.keras.utils.to_categorical(y_test, num_classes=CLASSES)

In [None]:
lr_decay = ReduceLROnPlateau(monitor='loss',
                            patience=1, verbose=0,
                            factor=0.5)

In [None]:
rnn = Sequential()
rnn.add(SimpleRNN(input_shape=(timesteps, N),units=64,return_sequences=False,
                  kernel_regularizer=l2(PENALTY), recurrent_regularizer=l2(PENALTY),
                  dropout=0.2, recurrent_dropout=0.0))
rnn.add(BatchNormalization())
rnn.add(Dense(CLASSES, activation="softmax"))
rnn.compile(loss='categorical_crossentropy',
            metrics=[f1_score_macro],
            optimizer=Adam(learning_rate=LEARNING_RATE))
print(rnn.summary())

In [None]:
rnn = Sequential()
rnn.add(SimpleRNN(units=64,return_sequences=False,
                  kernel_regularizer=l2(PENALTY), recurrent_regularizer=l2(PENALTY),
                  dropout=0.2, recurrent_dropout=0.0))
rnn.add(BatchNormalization())
rnn.add(Dense(CLASSES, activation="softmax"))
rnn.compile(loss='categorical_crossentropy',
            metrics=[f1_score_macro],
            optimizer=Adam(learning_rate=LEARNING_RATE))
rnn.build((60, 60, 16))
print(rnn.summary())

In [None]:
history_rnn = rnn.fit(X_train, y_train,
                      epochs=10,
                      batch_size=BATCH_SIZE,
                      validation_split=0.0,
                      validation_data=(X_test, y_test),
                      shuffle=True,
                      )

In [None]:
history_rnn = rnn.fit(X_train, y_train,
                      epochs=50,
                      batch_size=BATCH_SIZE,
                      validation_split=0.0,
                      validation_data=(X_test, y_test),
                      shuffle=True,
                      callbacks=[lr_decay],
                      )

In [None]:
test_loss_rnn, test_f1_rnn = rnn.evaluate(X_test, y_test,
                                         batch_size=BATCH_SIZE)



In [None]:
import matplotlib.pyplot as plt
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

In [None]:
plot_graphs(history_rnn, 'f1_score_macro')

In [None]:
plot_graphs(history_rnn, 'loss')

# LSTM

In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(input_shape=(timesteps, N), units=128,
                   activation='tanh', recurrent_activation='sigmoid',
                   kernel_regularizer=l2(PENALTY), recurrent_regularizer=l2(PENALTY),
                   dropout=0.0, recurrent_dropout=0.0,
                   return_sequences=False, return_state=False,
                   stateful=False, unroll=False,
                   use_bias=True))
model_lstm.add(BatchNormalization())
model_lstm.add(Dense(units=CLASSES, activation='softmax'))
model_lstm.compile(loss='categorical_crossentropy',
                   metrics=[f1_score_macro],
              optimizer=Adam(learning_rate=LEARNING_RATE))
print(model_lstm.summary())

In [None]:
history_lstm = model_lstm.fit(X_train, y_train,
                        epochs=50,
                        batch_size=BATCH_SIZE,
                        validation_split=0.0,
                        validation_data=(X_test, y_test),
                        shuffle=True,
                        callbacks=[lr_decay])

In [None]:
test_loss_lstm, test_f1_lstm = model_lstm.evaluate(X_test, y_test,
                                         batch_size=BATCH_SIZE)

In [None]:
plot_graphs(history_lstm, 'loss')

In [None]:
plot_graphs(history_lstm, 'f1_score_macro')

# ESN

In [None]:
!pip install tensorflow-addons

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.16.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 5.3 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.16.1


In [None]:
from tensorflow_addons.layers import ESN


In [None]:
esn = Sequential()
esn.add(ESN(input_shape=(timesteps, N), units=512, 
            return_sequences=False))
esn.add(BatchNormalization())
esn.add(Dense(128, activation="sigmoid", kernel_regularizer=l2(PENALTY)))
esn.add(BatchNormalization())
esn.add(Dense(CLASSES, activation="softmax"))
esn.compile(loss='categorical_crossentropy',
            metrics=[f1_score_macro],
            optimizer=Adam(LEARNING_RATE))
print(esn.summary())

In [None]:
history_esn = esn.fit(X_train, y_train,
                             epochs=50,
                             batch_size=BATCH_SIZE,
                             validation_split=0.0,
                             validation_data=(X_test, y_test),
                             shuffle=True,
                      callbacks=[lr_decay])

In [None]:
test_loss_esn, test_f1_esn = esn.evaluate(X_test, y_test,
                                         batch_size=BATCH_SIZE)

In [None]:
plot_graphs(history_esn, 'loss')

In [None]:
plot_graphs(history_esn, 'f1_score_macro')