## Trilateration Notebook
### Experiments based on the computation of the intersection of three or more beacons to compute the user's position

### Imports
Imports of required libraries

In [None]:
#CSV Related
import pandas as pd
import csv

# Machine Learning
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import  accuracy_score
from sklearn.preprocessing import OneHotEncoder,LabelEncoder
from sklearn.model_selection import train_test_split,KFold,StratifiedKFold
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.pipeline import make_pipeline
import sklearn
import scipy

#Graphical Display
%matplotlib inline
import math
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
sns.despine()
from pandas.plotting import scatter_matrix

plt.style.use('seaborn-whitegrid')

#Additional Helper Libraries
from operator import itemgetter
import re

### Load File
Function that allows to load a file from disk

In [None]:
#Function that reads a csv file and structures the data accordingly
def load_file(filename):
    dataset = pd.read_csv(filename)
    return dataset

### Replace 0 with Nan RSSI Values

In [None]:
def replace_features_nan(dataset):
    dataset['rssi_Value'] = dataset['rssi_Value'].replace(0,np.nan)
    dataset['rolling_mean_rssi'] = dataset['rolling_mean_rssi'].replace(0,np.nan)

### Initialize Dataset
Initialization of the data from the file

In [None]:
dataset = load_file('trilateration_test.csv')
display(dataset)
display(dataset.shape)

### Initialize Training Dataset
Initialization of the data from the file

In [None]:
train_dataset = load_file('../../dataset_train_university.csv')
positions = train_dataset['coordinate_Y']
train_dataset['distance'] = positions
replace_features_nan(train_dataset)
display(train_dataset)
display(train_dataset.shape)
train_dataset.hist(bins=50,figsize=(12,6))
plt.show()

### Initialize Test Set

In [None]:
test_dataset = load_file('dataset_test_trilateration_university.csv')
positions = test_dataset['coordinate_Y']
test_dataset['distance'] = positions
replace_features_nan(test_dataset)
display(test_dataset)
display(test_dataset.shape)
test_dataset.hist(bins=50,figsize=(12,6))
plt.show()

### File Overall Description
Usage of describe pandas function on the dataset

In [None]:
display(dataset.describe().transpose())

### % of Missing Values on Test Dataset

In [None]:
def compute_missing_values(dataset): 
    missing_values = dataset.isnull().sum()
    missing_values_dataset = pd.DataFrame(missing_values,columns=['Missing Values'])
    missing_percentage = (dataset.isnull().sum() / dataset.count())
    missing_percentage_dataset = pd.DataFrame(missing_percentage,columns=['% of Missing Values'])
    missing_dataset = missing_values_dataset.join(missing_percentage_dataset)
    display(missing_dataset)

In [None]:
compute_missing_values(test_dataset)
compute_missing_values(train_dataset)

### Overall Histogram of dataset atributes

In [None]:
dataset.hist(bins=50,figsize=(12,6))
plt.show()

### Beacon RSSI Display of Test Dataset
Graphical display of the rssi values recorded at each position

In [None]:
plt.figure(figsize=(6,3))
# Title
plt.title('RSSI Probability Distribution')

sns.distplot(train_dataset['rssi_Value'])
# The X Label
plt.xlabel('RSSI (dB)')
# The Y Label
plt.ylabel('Probability')
# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(6,3))
# Title
plt.title('RSSI Probability Distribution')

sns.distplot(test_dataset['rssi_Value'])
# The X Label
plt.xlabel('RSSI (dB)')
# The Y Label
plt.ylabel('Probability')
# Show the plot
plt.show()

### Beacon Rolling Mean RSSI Display of Test Dataset
Graphical adisplay of the rolling mean value recorded at each position

In [None]:
plt.figure(figsize=(6,3))
# Title
plt.title('RSSI Rolling Mean Probability Distribution')
# The KDE plot for the Rolling Mean column
sns.distplot(train_dataset['rolling_mean_rssi'])
# The X Label
plt.xlabel('RSSI (dB)')
# The Y Label
plt.ylabel('Probability')
# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(6,3))
# Title
plt.title('RSSI Rolling Mean Probability Distribution')
# The KDE plot for the Rolling Mean column
sns.distplot(test_dataset['rolling_mean_rssi'])
# The X Label
plt.xlabel('RSSI (dB)')
# The Y Label
plt.ylabel('Probability')
# Show the plot
plt.show()

### Graphical Display of RSSI/Rolling Mean Distribution of Test Dataset
Graphical display of the previous measures - rssi and rolling mean

In [None]:
plt.figure(figsize=(6,3))
# Title
plt.title('RSSI Probability Distribution')
# The KDE plot for the RSSI column
sns.kdeplot(train_dataset['rssi_Value'], label='RSSI')
# The KDE plot for the Rolling Mean column
sns.kdeplot(train_dataset['rolling_mean_rssi'], label='Rolling Mean RSSI')
# The X Label
plt.xlabel('RSSI (dB)')
# The Y Label
plt.ylabel('Probability')
# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(6,3))
# Title
plt.title('RSSI Probability Distribution')
# The KDE plot for the RSSI column
sns.kdeplot(test_dataset['rssi_Value'], label='RSSI')
# The KDE plot for the Rolling Mean column
sns.kdeplot(test_dataset['rolling_mean_rssi'], label='Rolling Mean RSSI')
# The X Label
plt.xlabel('RSSI (dB)')
# The Y Label
plt.ylabel('Probability')
# Show the plot
plt.show()

### Correlations
Check how much each attribute collerates with each other. Valures frange from -1 to 1. Close to 1 means a strong positive correlation.

In [None]:
def compute_correlations(dataset,attribute,flag=0):
    corr_matrix = dataset.corr()
    display(corr_matrix)
    display(corr_matrix[attribute].sort_values(ascending=False))
    if flag == 1:
        attributes = ["distance","rssi_Value", "rolling_mean_rssi"]
        scatter_matrix(dataset[attributes], figsize=(12, 8))

In [None]:
compute_correlations(dataset,'X')
compute_correlations(train_dataset,'distance',1)

### Data Cleaning
Fill in missing values and fix/remove outliers

In [None]:
def compute_data_cleaning(dataset,feature):
    nan_filler = dataset[feature].min()*1.010
    dataset[feature] = dataset[feature].fillna(nan_filler) # Fill missing values

In [None]:
compute_data_cleaning(train_dataset,'rssi_Value')
compute_data_cleaning(train_dataset,'rolling_mean_rssi')
compute_data_cleaning(test_dataset,'rssi_Value')
compute_data_cleaning(test_dataset,'rolling_mean_rssi')

### Dealing with Categorical Data
Using Hot-Encoder or Label Encoder to convert text/categorical data into numerical data. ML algorithms prefer it

In [None]:
label_encoder = LabelEncoder()
def compute_encoder(categorical_data,flag):
    if flag == 0:
        labels = label_encoder.fit_transform(categorical_data)
    else:
        labels = label_encoder.transform(categorical_data)
    return labels

In [None]:
categorical_zone = train_dataset[['zone']]
print("Previous Categorical Data")
display(categorical_zone)
zone_changed = compute_encoder(categorical_zone,0)
print("After One Hot Encoder")
train_dataset['labels'] = zone_changed
display(train_dataset.head(5))

In [None]:
categorical_zone = test_dataset[['zone']]
print("Previous Categorical Data")
display(categorical_zone)
zone_changed = compute_encoder(categorical_zone,0)
print("After One Hot Encoder")
test_dataset['labels'] = zone_changed
display(test_dataset.head(5))

### Plot Dataset
Graphical Display of the observations in the dataset

In [None]:
plt.figure(figsize=(14,5))
plt.scatter(dataset['X'],dataset['Y'],label='Reference Points',color='orange')
plt.title('Observations')  
plt.xlabel('Zone')  
plt.ylabel('RSSI(dB)')  
plt.legend(loc='upper right');
plt.show()

### Initialize Training Data
Training Data intialization for predictions purposes

In [None]:
trainX_data = train_dataset[['rssi_Value','rolling_mean_rssi']]
display(trainX_data.shape)
default_groups = train_dataset['labels'].values.reshape(-1,1)
initial_prediction = tuple()

In [None]:
train_Y = pd.DataFrame(train_dataset['distance']).values.reshape(-1,1)
display(train_Y.shape)

In [None]:
data_plot_means = train_dataset.groupby(['distance'])[['rssi_Value','rolling_mean_rssi']].mean()
display(data_plot_means)

### Plot Training Data
Graphical Display of the observations in the dataset

In [None]:
plt.figure(figsize=(14,5))
plt.scatter(train_dataset['distance'],train_dataset['rssi_Value'],label='RSSI Value',color='blue')
plt.scatter(train_dataset['distance'],train_dataset['rolling_mean_rssi'],label='Rolling Mean RSSI Value',color='orange')
plt.title('Observations')  
plt.xlabel('Zone')  
plt.ylabel('RSSI(dB)')  
plt.legend(loc='upper right');
plt.show()

### Initialize Test Data

In [None]:
test_X_rssi = pd.DataFrame(test_dataset['rssi_Value']).values.reshape(-1,1)
display(test_X_rssi.shape)
test_X_rolling_mean = pd.DataFrame(test_dataset['rolling_mean_rssi']).values.reshape(-1,1)
display(test_X_rolling_mean.shape)
test_combination_features_X = test_dataset[['rssi_Value','rolling_mean_rssi']]
display(test_combination_features_X.shape)

In [None]:
test_Y = pd.DataFrame(test_dataset['distance']).values.reshape(-1,1)
display(test_Y.shape)

In [None]:
data_plot_means = test_dataset.groupby(['distance'])[['rssi_Value','rolling_mean_rssi']].mean()
display(data_plot_means)

### Plot Test Data

In [None]:
plt.figure(figsize=(14,5))
plt.scatter(test_dataset['distance'],test_dataset['rssi_Value'],label='Test RSSI Value',color='blue')
plt.scatter(test_dataset['distance'],test_dataset['rolling_mean_rssi'],label='Test Rolling Mean RSSI Value',color='orange')
plt.title('Observations')  
plt.xlabel('Zone')  
plt.ylabel('RSSI(dB)')  
plt.legend(loc='upper right');
plt.show()

### Statistics Output Format
Structure of the statistical output of each call to the machine learning algorithm

In [None]:
def structure_data(data):
    mae = data['error'].abs().mean()
    mse = np.power(data['error'],2).mean()
    rsme = np.sqrt(mse)
    std = data['error'].std()
    q25 = data['error'].quantile(q=0.25)
    q50= data['error'].quantile(q=0.5)
    q75 =data['error'].quantile(q=0.7)
    q95= data['error'].quantile(q=0.95)
    minValue= data['error'].min()
    maxValue = data['error'].max()
    statistical_results = [mae,mse,rsme,std,q25,q50,q75,q95,minValue,maxValue]
    return statistical_results

### Proximity ML Algorithm to compute Distances

In [None]:
def compute_KNN_with_Regression(testX_data=None, scaler=None, n_neighbors=5,
                                weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski',
                                metric_params=None, n_jobs=1):
    # Init the KNN Regressor Estimator
    knn_regression_estimator = KNeighborsRegressor(n_neighbors, weights, algorithm, leaf_size, p, metric, metric_params,
                                                   n_jobs)
    if scaler is not None:
        # Make pipeline using scaler transformation
        main_estimator = make_pipeline(scaler, knn_regression_estimator)
    else:
        main_estimator = knn_regression_estimator
    # Fit the training data
    main_estimator.fit(trainX_data, train_Y)
    # Predict the results of the testing data features
    predict_test = main_estimator.predict(testX_data)
    return predict_test

In [None]:
distances = {}
beacons = []
def compute_distance(flag=0):
    rf = test_dataset.groupby((['BLE Beacon']))
    for k, v in rf:
        if flag == 0:
            print("BEACON " + str(k))
            display(v)
        test_combination_features_X = v[['rssi_Value','rolling_mean_rssi']]
        prediction_list = compute_KNN_with_Regression(testX_data=test_combination_features_X,scaler=StandardScaler(),n_neighbors=30,
                                                      metric='manhattan')
        if flag == 0:
            display(prediction_list.shape)
        distances[k] = np.mean(prediction_list)
        beacons.append(k)
        if flag == 0:
            print("PREDICTION: " + str(distances[k]))
            print("\n")
    if flag == 0:
        print("DISTANCES ESTIMATIONS")
        print(distances)

### Compute Trilateration Method

In [None]:
def compute_squared_errors(d1,d2):
    squared_errors = math.pow(d1 - d2, 2.0)
    return squared_errors

In [None]:
def compute_display(results_mse):
    xs = []
    ys = []
    for k,v in results_mse.items():
        xs.append(str(k))
        ys.append(v)
    plt.figure(figsize=(23,8))
    plt.plot(xs,ys)
    plt.show()

In [None]:
def compute_trilateration(flag = 0):
    results_mse = {}
    for index, row in dataset.iterrows():
        x = row['X']
        y = row['Y']
        squared_errors = 0.0
        for beacon in beacons:
            distance_known = row[beacon]
            squared_errors += compute_squared_errors(distance_known,distances[beacon])
        mse = squared_errors / len(beacons)
        if flag == 0:
            print('MSE FOR POINT: x: ' + str(x) + ", y: " + str(y) + " is " + str(mse))
        results_mse[(x,y)] = mse
    prediction = min(results_mse,key=results_mse.get)
    print("PREDICTION is " + str(prediction))
    if flag == 0:
        compute_display(results_mse)
    return prediction

### Experiment - Compute Distances
Make initial prediction on distances 

In [None]:
compute_distance()

### Experiment - Compute Trilateration
Make initial prediction on trilateration

In [None]:
initial_prediction = compute_trilateration()

### Experiment - Make several runs on Trilateration
This experiment allows to check the consistency of the predictions

In [None]:
statistical_cols = ['mae','mse','rmse','std','.25','0.5','0.75','0.95','min','max']
runs = list(range(1, 31))
predictions = list()
actual = list()
results_stats = list()
for run in runs:
    compute_distance(flag=1)
    prediction = compute_trilateration(flag=1)
    predictions.append(prediction)
    actual.append(initial_prediction)
results = list(zip(actual,predictions))
error_dataframe = pd.DataFrame(results,columns=['Actual','Prediction'])
observed_errors = list()
for index, row in error_dataframe.iterrows():
        x1 = row['Actual']
        x2 = row['Prediction']
        error = math.hypot(x2[0] - x1[0], x2[1] - x1[1])
        observed_errors.append(error)
error = pd.DataFrame(observed_errors, columns=["error"])
result = error_dataframe.join(error)
display(result)
stats = structure_data(result)
results_stats.append(stats)
statistics = pd.DataFrame(columns=statistical_cols,data=results_stats)
display(statistics)

### Experiment - Having data labeled to a certain position, can the algorithm correctly figure the position