In [149]:
import sklearn
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from PyRadioLoc.Utils.GeoUtils import GeoUtils

In [150]:
def get_distance_array(y_pred, y_test):
    dist = list()
    for y_sample, y_pred in  zip(y_test, y_pred):
        dist.append(GeoUtils.distanceInKm(y_sample[0], y_sample[1], y_pred[0], y_pred[1]))
    return dist

def get_quality_metrics(y_pred, y_test):
    dist = get_distance_array(y_pred, y_test)
    localization_error_min = min(dist)*1000
    localization_error_max = max(dist)*1000
    localization_error_mean = mean(dist)*1000
    localization_error_standard_deviation = stdev(dist)*1000
    print("min:            %0.2f m" % localization_error_min)
    print("max:            %0.2f m" % localization_error_max)
    print("mean:           %0.2f m" % localization_error_mean)
    print("std deviation:  %0.2f m" % localization_error_standard_deviation)
    
def write_to_csv(y_pred, test_id, file_path):
    y_pred_dataframe = pd.DataFrame(y_pred, columns=['lat', 'lon'])
    y_pred_dataframe = pd.concat([y_pred_dataframe, test_id], axis=1)
    y_pred_dataframe.to_csv(path_or_buf=file_path, index = False)

In [86]:
db_train = pd.read_csv("database/LocTreino.csv")
db_test = pd.read_csv("database/LocTeste.csv")
db_train.head()

Unnamed: 0,lat,lon,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3,pontoId
0,-8.05438,-34.95408,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2,7531
1,-8.05337,-34.94889,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4,7748
2,-8.0539,-34.95955,-85.255714,-99.635714,-86.551429,-94.52,-100.225,-88.232857,-78.955,-92.08,-66.41,6,7,1,7636
3,-8.05561,-34.95431,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2,7035
4,-8.05562,-34.95405,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2,7026


In [91]:
X_train = db_train.drop("lat", axis = 1)
X_train = X_train.drop("lon", axis = 1)
X_train = X_train.drop("pontoId", axis = 1)
X_test = db_test.drop("lat", axis  = 1)
X_test = X_test.drop("lon", axis = 1)
X_test = X_test.drop("pontoId", axis = 1)
X_train.head()

Unnamed: 0,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3
0,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2
1,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4
2,-85.255714,-99.635714,-86.551429,-94.52,-100.225,-88.232857,-78.955,-92.08,-66.41,6,7,1
3,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2
4,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2


In [92]:
lat_train = db_train["lat"]
lat_test = db_test["lat"]
lat_train.head()

0   -8.05438
1   -8.05337
2   -8.05390
3   -8.05561
4   -8.05562
Name: lat, dtype: float64

In [93]:
lon_train = db_train["lon"]
lon_test = db_test["lon"]
lon_train.head()

0   -34.95408
1   -34.94889
2   -34.95955
3   -34.95431
4   -34.95405
Name: lon, dtype: float64

In [122]:
y_train = pd.concat([lat_train, lon_train], axis=1)
y_test = pd.concat([lat_test, lon_test], axis=1)
test_id = db_test["pontoId"]
y_ref = pd.concat([y_test, test_id], axis=1)
y_ref.head()

Unnamed: 0,lat,lon,pontoId
0,-8.05847,-34.94977,6697.0
1,-8.05088,-34.95318,8593.0
2,-8.05446,-34.95612,7492.0
3,-8.05514,-34.94997,7360.0
4,-8.05143,-34.95325,8432.0


In [131]:
from sklearn.ensemble import RandomForestRegressor
reg_cso = RandomForestRegressor(n_estimators = 120, random_state = 42)
reg_cso.fit(X_train, y_train)
y_pred_cso = reg_cso.predict(X_test)

In [153]:
write_to_csv(y_pred_cso, test_id, "mlp_pred.csv")

In [133]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
results = [mean_absolute_error(y_test, y_pred_cso), mean_squared_error(y_test, y_pred_cso), np.sqrt(mean_squared_error(y_test, y_pred_cso)), r2_score(y_test, y_pred_cso)]
results

[0.00018912108893137734,
 7.533678097848492e-08,
 0.00027447546516671563,
 0.9941826451878646]

In [152]:
y_test_np = y_test.to_numpy()
get_quality_metrics(y_test_np, y_pred_cso)    

min:            0.89 m
max:            146.11 m
mean:           33.49 m
std deviation:  26.82 m


In [99]:
from sklearn import preprocessing
scaler = preprocessing.RobustScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled  = scaler.transform(X_test)
reg_cso_scaled = RandomForestRegressor(n_estimators = 120, random_state = 42)
reg_cso_scaled.fit(X_train_scaled, y_train)
y_pred_cso_scaled = reg_cso_scaled.predict(X_test_scaled)

In [148]:
get_quality_metrics(y_test_np, y_pred_cso_scaled)

min:            0.89 m
max:            146.11 m
mean:           33.42 m
std deviation:  26.87 m


In [13]:
from sklearn.model_selection import GridSearchCV
parameters = {'n_estimators': list(range(120, 125)), 'criterion': ["mse", "mae"]}
random_forest = RandomForestRegressor()

grid = GridSearchCV(
    random_forest,
    parameters,
    cv = 5,
    scoring = 'neg_mean_squared_error',
    return_train_score = True,
    refit = True
)
grid.fit(X_train, y_train)
grid.best_params_

{'criterion': 'mae', 'n_estimators': 120}

In [74]:
from sklearn.neural_network import MLPRegressor
activation = ["identity", "logistic", "tanh", "relu"]
solver = ["lbfgs"]
learning_rate = ["constant", "invscaling", "adaptive"]

parameters = {'activation' : activation, "solver" : solver, "learning_rate" : learning_rate}

mlp_regressor = MLPRegressor()
grid = GridSearchCV(
    mlp_regressor,
    parameters,
    cv = 10,
    scoring = 'neg_mean_squared_error',
    return_train_score = True,
    refit = True
)
grid.fit(X_train, y_train)
grid.best_params_

{'activation': 'tanh', 'learning_rate': 'constant', 'solver': 'lbfgs'}

In [140]:
mlp = MLPRegressor(activation ='tanh', learning_rate = 'constant', solver = 'lbfgs')
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)

In [144]:
get_quality_metrics(y_pred_mlp, y_test_np)

min:            104.81 m
max:            1060.43 m
mean:           517.08 m
std deviation:  225.35 m


In [147]:
write_to_csv(y_pred_mlp, test_id, "mlp_pred.csv")