In [1]:
# Slighlty updated for Keras 2.0 APIs
# - by Kyeong Soo (Joseph) Kim

import pandas as pd
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import scale
from keras.models import Sequential
from keras.layers import Dense, Dropout
from timeit import default_timer as timer

Using TensorFlow backend.


In [2]:
path_train = "./UJIndoorLoc/trainingData2.csv"
path_validation = "./UJIndoorLoc/validationData2.csv"

In [3]:
#Explicitly pass header=0 to be able to replace existing names 
train_df = pd.read_csv(path_train,header = 0)
train_df = train_df[:19930]
train_AP_strengths = train_df.iloc[:,:520] #select first 520 columns

#Scale transforms data to center to the mean and component wise scale to unit variance
train_AP_features = scale(np.asarray(train_AP_strengths).astype(float), axis=1) # convert integer to float and scale jointly (axis=1)

#The following two objects are actually pandas.core.series.Series objects
building_ids_str = train_df["BUILDINGID"].map(str) #convert all the building ids to strings
building_floors_str = train_df["FLOOR"].map(str) #convert all the building floors to strings

res = building_ids_str + building_floors_str #element wise concatenation of BUILDINGID+FLOOR
train_labels = np.asarray(building_ids_str + building_floors_str)

#convert labels to categorical variables, dummy_labels has type 'pandas.core.frame.DataFrame'
dummy_labels = pd.get_dummies(train_labels)


"""one hot encode the dummy_labels.
this is done because dummy_labels is a dataframe with the labels (BUILDINGID+FLOOR) 
as the column names
"""
train_labels = np.asarray(dummy_labels) #labels is an array of shape 19937 x 13. (there are 13 types of labels)

In [4]:
#generate len(train_AP_features) of floats in between 0 and 1
train_val_split = np.random.rand(len(train_AP_features))
#convert train_val_split to an array of booleans: if elem < 0.7 = true, else: false
train_val_split = train_val_split < 0.70 #should contain ~70% percent true

We aren't given a formal testing set, so we will treat the given validation set as the testing set

In [5]:
# We will then split our given training set into training + validation 
train_X = train_AP_features[train_val_split]
train_y = train_labels[train_val_split]
val_X = train_AP_features[~train_val_split]
val_y = train_labels[~train_val_split]

In [6]:
#Turn the given validation set into a testing set
test_df = pd.read_csv(path_validation,header = 0)
test_AP_features = scale(np.asarray(test_df.iloc[:,0:520]).astype(float))
test_labels = np.asarray(test_df["BUILDINGID"].map(str) + test_df["FLOOR"].map(str))
test_labels = np.asarray(pd.get_dummies(test_labels))

In [7]:
epochs = 20
batch_size = 10
input_size = 520
num_classes = 13
nums_hidden_nodes = [16, 32, 64, 128, 256, 512]
# nums_hidden_nodes = [16]
# dropout_rates = np.arange(11)*0.05 # 0.0, 0.05,...,0.5
dropout_rates = [0.0, 0.2, 0.5] # for test
# dropout_rates = [0.5] # for test
losses = np.zeros((len(nums_hidden_nodes), len(dropout_rates)))
accuracies = np.zeros_like(losses)

In [8]:
def classifier(num_hidden_nodes, dropout_rate):
    model = Sequential()
    model.add(Dense(num_hidden_nodes, input_dim=input_size, activation='tanh', use_bias=True))
#     model.add(Dropout(dropout_rate))
#     model.add(Dense(256, activation='tanh', use_bias=True))
#     model.add(Dropout(dropout_rate))
#     model.add(Dense(128, activation='tanh', use_bias=True))
#     model.add(Dropout(dropout_rate))
#     model.add(Dense(64, activation='tanh', use_bias=True))
#     model.add(Dropout(dropout_rate))
    model.add(Dense(num_classes, activation='softmax', use_bias=True))
    model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])
    return model

In [9]:
for nh in range(len(nums_hidden_nodes)):
    for dr in range(len(dropout_rates)):
        c = classifier(nums_hidden_nodes[nh], dropout_rates[dr])
        startTime = timer()
        c.fit(train_X, train_y, validation_data=(val_X, val_y), batch_size=batch_size, epochs=epochs)
        elapsedTime = timer() - startTime
        print("Model trained with nh=%d & dr=%.2f in %e s.\n" % (nums_hidden_nodes[nh], dropout_rates[dr], elapsedTime))
        loss, acc = c.evaluate(test_AP_features, test_labels)
        losses[nh, dr] = loss
        accuracies[nh, dr] = acc

Train on 13986 samples, validate on 5944 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=16 & dr=0.00 in 5.039435e+01 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=16 & dr=0.20 in 5.275321e+01 s.
  32/1111 [..............................] - ETA: 0sTrain on 13986 samples, validate on 5944 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=16 & dr=0.50 in 6.207820e+0

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=32 & dr=0.00 in 5.909321e+01 s.
Train on 13986 samples, validate on 5944 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=32 & dr=0.20 in 5.733121e+01 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=32 & dr=0.50 in 6.032151e+01 s.
  32/1111 [..............................] - ETA: 0sTrain on 13986 samples, validate on 5944 samples
Epoch 

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=64 & dr=0.00 in 6.534536e+01 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=64 & dr=0.20 in 6.260847e+01 s.
  32/1111 [..............................] - ETA: 0sTrain on 13986 samples, validate on 5944 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=64 & dr=0.50 in 7.364042e+01 s.
Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=128 & dr=0.00 in 8.420690e+01 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=128 & dr=0.20 in 8.441882e+01 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=128 & dr=0.50 in 8.922879e+01 s.
  32/1111 [..............................] - ETA: 0sTrain on 13986 samples, validate on 5944 samples
Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=256 & dr=0.00 in 1.145556e+02 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=256 & dr=0.20 in 1.189624e+02 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=256 & dr=0.50 in 1.050173e+02 s.
Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=512 & dr=0.00 in 2.104942e+02 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=512 & dr=0.20 in 2.221081e+02 s.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model trained with nh=512 & dr=0.50 in 2.234092e+02 s.

In [10]:
for nh in range(len(nums_hidden_nodes)):
    for dr in range(len(dropout_rates)):
        print("nh=%d & dr=%.2f: loss=%e, accuracy=%e" % (nums_hidden_nodes[nh], dropout_rates[dr], losses[nh, dr], accuracies[nh, dr]))

nh=16 & dr=0.00: loss=1.234082e+00, accuracy=7.533753e-01

nh=16 & dr=0.20: loss=1.492540e+00, accuracy=7.353735e-01

nh=16 & dr=0.50: loss=1.369105e+00, accuracy=7.488749e-01

nh=32 & dr=0.00: loss=1.026828e+00, accuracy=8.001800e-01

nh=32 & dr=0.20: loss=1.256892e+00, accuracy=7.560756e-01

nh=32 & dr=0.50: loss=1.296779e+00, accuracy=7.659766e-01

nh=64 & dr=0.00: loss=1.033471e+00, accuracy=7.947795e-01

nh=64 & dr=0.20: loss=9.504665e-01, accuracy=8.235824e-01

nh=64 & dr=0.50: loss=1.162159e+00, accuracy=7.830783e-01

nh=128 & dr=0.00: loss=1.055724e+00, accuracy=8.001800e-01

nh=128 & dr=0.20: loss=1.023210e+00, accuracy=8.343834e-01

nh=128 & dr=0.50: loss=1.475645e+00, accuracy=7.389739e-01

nh=256 & dr=0.00: loss=1.222873e+00, accuracy=7.947795e-01

nh=256 & dr=0.20: loss=9.960540e-01, accuracy=8.280828e-01

nh=256 & dr=0.50: loss=1.053368e+00, accuracy=8.127813e-01

nh=512 & dr=0.00: loss=1.344286e+00, accuracy=8.100810e-01

nh=512 & dr=0.20: loss=1.255742e+00, accuracy=7.9