In [39]:
import sklearn
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from PyRadioLoc.Utils.GeoUtils import GeoUtils

In [40]:
def get_distance_array(y_pred, y_test):
    dist = list()
    for y_sample, y_pred in  zip(y_test, y_pred):
        dist.append(GeoUtils.distanceInKm(y_sample[0], y_sample[1], y_pred[0], y_pred[1]))
    return dist

def get_quality_metrics(y_pred, y_test):
    dist = get_distance_array(y_pred, y_test)
    localization_error_min = min(dist)*1000
    localization_error_max = max(dist)*1000
    localization_error_mean = np.mean(dist)*1000
    localization_error_standard_deviation = np.std(dist)*1000
    print("min:            %0.2f m" % localization_error_min)
    print("max:            %0.2f m" % localization_error_max)
    print("mean:           %0.2f m" % localization_error_mean)
    print("std deviation:  %0.2f m" % localization_error_standard_deviation)
    
def write_to_csv(y_pred, test_id, file_path):
    y_pred_dataframe = pd.DataFrame(y_pred, columns=['lat', 'lon'])
    y_pred_dataframe = pd.concat([y_pred_dataframe, test_id], axis=1)
    y_pred_dataframe.to_csv(path_or_buf=file_path, index = False)

In [42]:
db_train = pd.read_csv("database/LocTreino.csv")
db_test = pd.read_csv("database/test.csv")
db_train.head()

Unnamed: 0,lat,lon,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3,pontoId
0,-8.05438,-34.95408,-89.044286,-96.667143,-91.32,-96.106667,-94.48,-90.187143,-94.125,-85.182,-96.42,5,4,2,7531
1,-8.05337,-34.94889,-91.97,-89.548571,-88.555714,-82.52,-80.445,-89.91,-88.916667,-91.602,-97.255,5,2,4,7748
2,-8.05561,-34.95431,-90.025714,-96.2,-91.204286,-98.16,-91.935,-92.671429,-78.636667,-68.34,-92.78,6,5,2,7035
3,-8.05562,-34.95405,-94.525714,-96.2,-93.83,-98.216667,-94.67,-94.844286,-79.065,-68.25,-96.2125,6,5,2,7026
4,-8.05007,-34.95474,-82.999429,-98.255357,-87.818571,-101.535,-97.59125,-90.656905,-80.617222,-99.934667,-96.982778,3,5,3,8797


In [43]:
X_train = db_train.drop("lat", axis = 1)
X_train = X_train.drop("lon", axis = 1)
X_train = X_train.drop("pontoId", axis = 1)
#X_test = db_test.drop("lat", axis  = 1)
#X_test = X_test.drop("lon", axis = 1)
#X_test = X_test.drop("pontoId", axis = 1)
X_test = db_test.drop("pontoId", axis=1)
#X_train.head()
X_test.head()

Unnamed: 0,rssi_1_1,rssi_1_2,rssi_1_3,rssi_2_1,rssi_2_2,rssi_2_3,rssi_3_1,rssi_3_2,rssi_3_3,delay_1,delay_2,delay_3
0,-103.705,-110.425,-107.381667,-110.33,-93.8,-99.03,-98.33,-99.005,-115.0,6,3,3
1,-102.61,-115.0,-101.38,-115.0,-104.295,-97.36,-96.98,-99.5725,-115.0,6,4,3
2,-105.225,-115.0,-105.21,-115.0,-105.835,-115.0,-86.0,-86.784286,-108.12,6,4,3
3,-103.436667,-115.0,-99.75,-115.0,-102.805,-99.5,-100.31,-99.32,-115.0,6,4,3
4,-101.196667,-115.0,-96.04,-115.0,-96.79,-96.1,-94.42,-91.3275,-115.0,6,4,3


In [45]:
lat_train = db_train["lat"]
#lat_test = db_test["lat"]
lat_train.head()

0   -8.05438
1   -8.05337
2   -8.05561
3   -8.05562
4   -8.05007
Name: lat, dtype: float64

In [46]:
lon_train = db_train["lon"]
#lon_test = db_test["lon"]
lon_train.head()

0   -34.95408
1   -34.94889
2   -34.95431
3   -34.95405
4   -34.95474
Name: lon, dtype: float64

In [47]:
y_train = pd.concat([lat_train, lon_train], axis=1)
#y_test = pd.concat([lat_test, lon_test], axis=1)
test_id = db_test["pontoId"]
#y_ref = pd.concat([y_test, test_id], axis=1)
#y_ref.head()

In [48]:
from sklearn.ensemble import RandomForestRegressor
reg_cso = RandomForestRegressor(n_estimators = 120, random_state = 42)
reg_cso.fit(X_train, y_train)
y_pred_cso = reg_cso.predict(X_test)

In [49]:
write_to_csv(y_pred_cso, test_id, "./finalResult/Resultados_Equipe4_Metodo_4.csv")

In [36]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
results = [mean_absolute_error(y_test, y_pred_cso), mean_squared_error(y_test, y_pred_cso), np.sqrt(mean_squared_error(y_test, y_pred_cso)), r2_score(y_test, y_pred_cso)]
results

[0.00020511156260101987,
 1.1177266859978454e-07,
 0.00033432419685057874,
 0.9890148572631754]

In [37]:
y_test_np = y_test.to_numpy()
get_quality_metrics(y_test_np, y_pred_cso)    

min:            1.16 m
max:            393.16 m
mean:           36.09 m
std deviation:  37.78 m


In [99]:
from sklearn import preprocessing
scaler = preprocessing.RobustScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled  = scaler.transform(X_test)
reg_cso_scaled = RandomForestRegressor(n_estimators = 120, random_state = 42)
reg_cso_scaled.fit(X_train_scaled, y_train)
y_pred_cso_scaled = reg_cso_scaled.predict(X_test_scaled)

In [148]:
get_quality_metrics(y_test_np, y_pred_cso_scaled)

min:            0.89 m
max:            146.11 m
mean:           33.42 m
std deviation:  26.87 m


In [13]:
from sklearn.model_selection import GridSearchCV
parameters = {'n_estimators': list(range(120, 125)), 'criterion': ["mse", "mae"]}
random_forest = RandomForestRegressor()

grid = GridSearchCV(
    random_forest,
    parameters,
    cv = 5,
    scoring = 'neg_mean_squared_error',
    return_train_score = True,
    refit = True
)
grid.fit(X_train, y_train)
grid.best_params_

{'criterion': 'mae', 'n_estimators': 120}

In [74]:
from sklearn.neural_network import MLPRegressor
activation = ["identity", "logistic", "tanh", "relu"]
solver = ["lbfgs"]
learning_rate = ["constant", "invscaling", "adaptive"]

parameters = {'activation' : activation, "solver" : solver, "learning_rate" : learning_rate}

mlp_regressor = MLPRegressor()
grid = GridSearchCV(
    mlp_regressor,
    parameters,
    cv = 10,
    scoring = 'neg_mean_squared_error',
    return_train_score = True,
    refit = True
)
grid.fit(X_train, y_train)
grid.best_params_

{'activation': 'tanh', 'learning_rate': 'constant', 'solver': 'lbfgs'}

In [140]:
mlp = MLPRegressor(activation ='tanh', learning_rate = 'constant', solver = 'lbfgs')
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)

In [144]:
get_quality_metrics(y_pred_mlp, y_test_np)

min:            104.81 m
max:            1060.43 m
mean:           517.08 m
std deviation:  225.35 m


In [147]:
write_to_csv(y_pred_mlp, test_id, "mlp_pred.csv")