In [1]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import KFold

  from ._conv import register_converters as _register_converters


In [2]:
# Load datasets
occupancy_df_training = pd.read_csv("occupancy_data/datatraining.txt", header=0)

print(occupancy_df_training.head())

occupancy_df_test1 = pd.read_csv("occupancy_data/datatest.txt", header=0)
occupancy_df_test2 = pd.read_csv("occupancy_data/datatest2.txt", header=0)

spambase_df = pd.read_csv("spambase/spambase.data", header=None)

print(spambase_df.head())

                  date  Temperature  Humidity  Light     CO2  HumidityRatio  \
1  2015-02-04 17:51:00        23.18   27.2720  426.0  721.25       0.004793   
2  2015-02-04 17:51:59        23.15   27.2675  429.5  714.00       0.004783   
3  2015-02-04 17:53:00        23.15   27.2450  426.0  713.50       0.004779   
4  2015-02-04 17:54:00        23.15   27.2000  426.0  708.25       0.004772   
5  2015-02-04 17:55:00        23.10   27.2000  426.0  704.50       0.004757   

   Occupancy  
1          1  
2          1  
3          1  
4          1  
5          1  
     0     1     2    3     4     5     6     7     8     9  ...    48     49  \
0  0.00  0.64  0.64  0.0  0.32  0.00  0.00  0.00  0.00  0.00 ...  0.00  0.000   
1  0.21  0.28  0.50  0.0  0.14  0.28  0.21  0.07  0.00  0.94 ...  0.00  0.132   
2  0.06  0.00  0.71  0.0  1.23  0.19  0.19  0.12  0.64  0.25 ...  0.01  0.143   
3  0.00  0.00  0.00  0.0  0.63  0.00  0.31  0.63  0.31  0.63 ...  0.00  0.137   
4  0.00  0.00  0.00  0.0  0.63

In [3]:
occupancy_y = occupancy_df_training.Occupancy

occupancy_df_training = occupancy_df_training.drop("Occupancy", axis=1)
occupancy_df_training = occupancy_df_training.drop("date", axis=1)

spambase_y = spambase_df[57]
spambase_df = spambase_df.drop(57, axis=1)
print(spambase_df.head())

     0     1     2    3     4     5     6     7     8     9   ...    47    48  \
0  0.00  0.64  0.64  0.0  0.32  0.00  0.00  0.00  0.00  0.00  ...   0.0  0.00   
1  0.21  0.28  0.50  0.0  0.14  0.28  0.21  0.07  0.00  0.94  ...   0.0  0.00   
2  0.06  0.00  0.71  0.0  1.23  0.19  0.19  0.12  0.64  0.25  ...   0.0  0.01   
3  0.00  0.00  0.00  0.0  0.63  0.00  0.31  0.63  0.31  0.63  ...   0.0  0.00   
4  0.00  0.00  0.00  0.0  0.63  0.00  0.31  0.63  0.31  0.63  ...   0.0  0.00   

      49   50     51     52     53     54   55    56  
0  0.000  0.0  0.778  0.000  0.000  3.756   61   278  
1  0.132  0.0  0.372  0.180  0.048  5.114  101  1028  
2  0.143  0.0  0.276  0.184  0.010  9.821  485  2259  
3  0.137  0.0  0.137  0.000  0.000  3.537   40   191  
4  0.135  0.0  0.135  0.000  0.000  3.537   40   191  

[5 rows x 57 columns]


In [4]:
# Split with KFolds
kf = KFold(10, True, 1)

In [5]:
# Create a model with 2 layers
model = keras.Sequential()
model.add(keras.layers.Dense(16, activation=tf.nn.sigmoid)) # 16 neurons and sigmoid activation function
model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid)) # 1 neuron and sigmoid activation function

In [6]:
def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [7]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', f1_m])

In [8]:
overall_accuracy = 0
overall_f1_score = 0
for train, test in kf.split(occupancy_df_training):
    
    X_train, X_test, y_train, y_test = occupancy_df_training.iloc[train], occupancy_df_training.iloc[test], occupancy_y.iloc[train], occupancy_y.iloc[test]
    
    history = model.fit(X_train.values,
                        y_train.values,
                        epochs=10)
    
    # Model Accuracy: how often is the classifier correct?
    loss, accuracy, f1 = model.evaluate(X_test, y_test)
    
    overall_accuracy += accuracy
    overall_f1_score += f1
    
print("\naccuracy:   %0.3f, f1_score:   %0.3f\n" % (overall_accuracy/10, overall_f1_score/10))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [9]:
# Create a model with 2 layers
model = keras.Sequential()
model.add(keras.layers.Dense(16, activation=tf.nn.sigmoid, input_shape=(57,))) # 16 neurons and sigmoid activation function
model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid, input_shape=(57,))) # 1 neuron and sigmoid activation function

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', f1_m])

In [10]:
overall_accuracy = 0
overall_f1_score = 0
for train, test in kf.split(spambase_df):
    
    X_train, X_test, y_train, y_test = spambase_df.iloc[train], spambase_df.iloc[test], spambase_y.iloc[train], spambase_y.iloc[test]
    
    history = model.fit(X_train.values,
                        y_train.values,
                        epochs=5)
    
    # Model Accuracy: how often is the classifier correct?
    loss, accuracy, f1 = model.evaluate(X_test, y_test)
    
    overall_accuracy += accuracy
    overall_f1_score += f1
    
print("\naccuracy:   %0.3f, f1_score:   %0.3f\n" % (overall_accuracy/10, overall_f1_score/10))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

accuracy:   0.934, f1_score:   0.406



In [11]:
# Create a model with 1 layer
model = keras.Sequential()
model.add(keras.layers.Dense(16, activation=tf.nn.sigmoid)) # 16 neurons and sigmoid activation function

In [12]:
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy', f1_m])

In [13]:
overall_accuracy = 0
overall_f1_score = 0
for train, test in kf.split(occupancy_df_training):
    
    X_train, X_test, y_train, y_test = occupancy_df_training.iloc[train], occupancy_df_training.iloc[test], occupancy_y.iloc[train], occupancy_y.iloc[test]
    
    history = model.fit(X_train.values,
                        y_train.values,
                        epochs=10)
    
    # Model Accuracy: how often is the classifier correct?
    loss, accuracy, f1 = model.evaluate(X_test, y_test)
    
    overall_accuracy += accuracy
    overall_f1_score += f1
    
print("\naccuracy:   %0.3f, f1_score:   %0.3f\n" % (overall_accuracy/10, overall_f1_score/10))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [14]:
# Create a model with 1 layer
model = keras.Sequential()
model.add(keras.layers.Dense(16, activation=tf.nn.sigmoid)) # 16 neurons and sigmoid activation function

model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy', f1_m])

In [15]:
overall_accuracy = 0
overall_f1_score = 0
for train, test in kf.split(spambase_df):
    
    X_train, X_test, y_train, y_test = spambase_df.iloc[train], spambase_df.iloc[test], spambase_y.iloc[train], spambase_y.iloc[test]
    
    history = model.fit(X_train.values,
                        y_train.values,
                        epochs=10)
    
    # Model Accuracy: how often is the classifier correct?
    loss, accuracy, f1 = model.evaluate(X_test, y_test)
    
    overall_accuracy += accuracy
    overall_f1_score += f1
    
print("\naccuracy:   %0.3f, f1_score:   %0.3f\n" % (overall_accuracy/10, overall_f1_score/10))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E