In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# Python >= 3.5 is required
import sys
assert sys.version_info >= (3,5)

# Scikit-Learn >= 0.20 is required
import sklearn 
assert sklearn.__version__ >= "0.20"

# common imports
import pandas as pd
import numpy as np 
import os 

# to plot pretty figures
%matplotlib inline 
import matplotlib as mpl 
import matplotlib.pyplot as plt 
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

PROJECT_ROOT_DIR = "."
DATASET_PATH = os.path.join(PROJECT_ROOT_DIR, "wifi_dataset")
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images")
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [2]:
# path to csv files
path_to_train_csvs = os.path.join(DATASET_PATH, "train")
path_to_val_csvs = os.path.join(DATASET_PATH, "val")
path_to_test_csvs = os.path.join(DATASET_PATH, "test")

## Part 3. <font color=green>Predicting of a user's coordinates using feed forward neural networks</font>



### Task 1. Copy and paste your previous implementations of <font color=blue>build_feats</font>, <font color=blue>build_feats_targets</font>, and <font color=blue>euclidean_distance</font>



In [None]:
def build_feats(path_to_csvs):
    my_data = genfromtxt(f'{path_to_csvs}/1.csv', delimiter=',')
    
    my_data2 = genfromtxt(f'{path_to_csvs}/2.csv', delimiter=',')
    
    feats = np.concatenate((my_data, my_data2), axis=0)

    
    files = os.path.join(path_to_csvs, "*.csv")

    files = glob.glob(files)
    
    files.remove(f'{path_to_csvs}/1.csv')
    files.remove(f'{path_to_csvs}/2.csv')
    
    for file in files:
        my_data = genfromtxt(file, delimiter=',')
        feats = np.concatenate((feats, my_data), axis=0)
    
    return feats

def build_feats_targets(path_to_csvs):
    my_data = genfromtxt(f'{path_to_csvs}/1.csv', delimiter=',')
    
    my_data2 = genfromtxt(f'{path_to_csvs}/2.csv', delimiter=',')
    
    data_set = np.concatenate((my_data, my_data2), axis=0)
    
    files = os.path.join(path_to_csvs, "*.csv")

    files = glob.glob(files)
    
    files.remove(f'{path_to_csvs}/1.csv')
    files.remove(f'{path_to_csvs}/2.csv')
    
    for file in files:
        my_data = genfromtxt(file, delimiter=',')
        data_set = np.concatenate((data_set, my_data), axis=0)

    feats = data_set[:, :-3]
    targets = data_set[:, -3:]
    
    
    
    return feats, targets

def mean_error_dist(targets, preds):
    sum = 0
    for i in range(len(targets)):
        sum += np.linalg.norm(targets[i] - preds[i])
    dist = sum / len(targets)
    return dist


### <font color=red>NOTE !</font> 
Before we feed the data to a neural network, we first need to normalize it and substract the mean for a better convergence. 
   

In [None]:
feats_train, targets_train = build_feats_targets(path_to_train_csvs)
feats_train = (feats_train.astype('float32') + 100.0) / 100.0
mean = np.mean(feats_train)
feats_train_norm = (feats_train - mean)

# verify dimensions of the returned feature matrix and a target matrix
assert(feats_train.shape == (6049,220))
assert(targets_train.shape == (6049,3))

In [None]:
feats_val, targets_val = build_feats_targets(path_to_val_csvs)
feats_val = (feats_val.astype('float32') + 100.0) / 100.0
feats_val_norm = (feats_val - mean)

# verify dimensions of the returned feature matrix and a target matrix
assert(feats_val.shape == (1976,220))
assert(targets_val.shape == (1976,3))

In [None]:
feats_test = build_feats(path_to_test_csvs)
feats_test = (feats_test.astype('float32') + 100.0) / 100.0
feats_test_norm = (feats_test - mean)

# verify dimensions of the returned feature matrix
assert(feats_test.shape == (2601,220))

### Task 2. Using feed forward neural networks.
For this part, we provide you with a simple feed forward neural network (or a multi layer perceptron). The code is is given below.  You can tune the parameters, extend the network and even modify the model as you see fit in order to find the best model.

In [None]:
def lr_schedule(epoch):
    """Learning Rate Schedule
        # Arguments
            epoch (int): The number of epochs

        # Returns
            lr (float32): learning rate
    """
    lr = 1e-3
    if epoch > 50:
        lr = 1e-4
        
    print('Learning rate: ', lr)
    return lr

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

callbacks = [lr_reducer, lr_scheduler]

In [None]:
# network parameters
input_size = feats_train.shape[1]
epochs = 100
batch_size = 8
hidden_units = 128
dropout = 0.1

# model is a 3-layer MLP with ReLU and dropout after each layer
model = Sequential()
model.add(Dense(hidden_units, input_dim=input_size))
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(hidden_units))
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(hidden_units))
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(3))
model.summary()

In [None]:
optimizer = Adam(lr=lr_schedule(0))
model.compile(loss='mse',
              optimizer=optimizer,
              metrics=['mae'])

# train the network
H = model.fit(
    feats_train_norm, targets_train,
    validation_data=(feats_val_norm, targets_val), 
    batch_size=batch_size, 
    epochs=epochs,
    shuffle=True)

Take advantage of the code below to visualize the progression of your training

In [None]:
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, epochs), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, epochs), H.history["val_loss"], label="val_loss")
plt.title("MSE")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="lower left")
save_fig("mse_loss")

In [None]:
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, epochs), H.history["mae"], label="train_mae")
plt.plot(np.arange(0, epochs), H.history["val_mae"], label="val_mae")
plt.title("MAE")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="lower left")
save_fig("mae_loss")

Feel free to experiment with the network and let us know what result you got

In [None]:
preds = model.predict(feats_val_norm, batch_size=batch_size)
mean_error_dist(targets_val, preds)

### Feel free to experiment with the network or come up with a different DL approach. 
### The best mean error distance we got so far is 1.44.  Email us (<font color=blue>issai@nu.edu.kz</font>) the predictions of your best estimator on the test features AND your solutions to see how well you did !
<font color=red> Please don't forget that the target values of test set are stacked in the order of csv files. If your test features do not follow this order, your result will be ruined.</font>

In [None]:
preds = model.predict(feats_test_norm, batch_size=batch_size)

name = "John" # change to your first name
surname = "Snow" # change to your lastname

# email your csv file to issai@nu.edu.kz
pd.DataFrame(preds).to_csv("{}_{}.csv".format(name, surname), header=None, index=None)