In [3]:
import sys
sys.path.append("./lib/")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from lib.Methods import GeneralMethods
from lib.edasSearch import EdasHyperparameterSearch
from lib.Hiperparametros import HyperparameterSwitcher
from lib.ImportacionModelos import getClassifierNames
from lib.ImportacionModelos import getClassifierModels
from lib.ImportacionModelos import getRegressorNames
from lib.ImportacionModelos import getRegressorModels
from lib.graphicGenerator import GraphicBuilder
from sklearn.model_selection import train_test_split

__Distribución de data 80% data para entrenamiento y 20% para validación__

In [17]:
seed = 9
xSize = 1041
df = pd.read_csv("data/filtred.csv")
X = df[df.columns[:xSize]]
Y = df[df.columns[xSize:]]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=seed)
gbTrain = GraphicBuilder(pd.concat([X_train, y_train],axis=1))
gbTest = GraphicBuilder(pd.concat([X_test, y_test],axis=1))

## Algorithm Modeling

__RandomizedSearch: Probando con un modelo de Clasificación, los demás se probarán en Servidor__

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold
kf = KFold(n_splits=10)

estimadorDictionary = getClassifierModels(includeEnsambled=True)
hypSwitcher = HyperparameterSwitcher()
process = 'randomized'
n_iteraciones = 2
idModeloPrueba = 0

result = {}
modelName = getClassifierNames(includeEnsambled=True)[idModeloPrueba]
    
estimador = estimadorDictionary[modelName]
parametros = hypSwitcher.getHyperparameters(modelName)()
random_search = RandomizedSearchCV(estimador, param_distributions=parametros, 
                                   n_iter=n_iteraciones, cv=kf, scoring="accuracy", 
                                   return_train_score=False, n_jobs=-1)
random_search.fit(X_train, y_train.FLOOR)
result[modelName] = random_search.cv_results_

df1 = pd.DataFrame(np.array([result[modelName]['mean_test_score'], result[modelName]['std_test_score'],
                             result[modelName]['mean_fit_time'], result[modelName]['std_fit_time'],
                             result[modelName]['mean_score_time'], result[modelName]['std_score_time']
                            ]).T, columns = ['Accuracy', 'stdAccuracy', 'FitTime', 'stdFitTime', 'ScoreTime', 'stdScoreTime'])
df2 = pd.DataFrame(d_resl[modelName]['params'])
dff = pd.concat([df1,df2], axis=1).sort_values(['Accuracy', 'FitTime'], ascending=[False, True])
dff.to_csv("result/" + process + "/" + modelName + ".csv", index=False)

__ExhaustiveSearch: Probando con un modelo de Clasificación, los demás se probarán en Servidor__

In [None]:
process = 'exhaustive'
n_iteraciones = 2

result = {}
modelName = getClassifierNames(includeEnsambled=True)[idModeloPrueba]
    
estimador = estimadorDictionary[modelName]
parametros = hypSwitcher.getHyperparameters(modelName)()
random_search = RandomizedSearchCV(estimador, param_distributions=parametros, 
                                   n_iter=n_iteraciones, cv=kf, scoring="accuracy", 
                                   return_train_score=False, n_jobs=-1)
random_search.fit(X_train, y_train.FLOOR)
result[modelName] = random_search.cv_results_

df1 = pd.DataFrame(np.array([result[modelName]['mean_test_score'], result[modelName]['std_test_score'],
                             result[modelName]['mean_fit_time'], result[modelName]['std_fit_time'],
                             result[modelName]['mean_score_time'], result[modelName]['std_score_time']
                            ]).T, columns = ['Accuracy', 'stdAccuracy', 'FitTime', 'stdFitTime', 'ScoreTime', 'stdScoreTime'])
df2 = pd.DataFrame(d_resl[modelName]['params'])
dff = pd.concat([df1,df2], axis=1).sort_values(['Accuracy', 'FitTime'], ascending=[False, True])
dff.to_csv("result/" + process + "/" + modelName + ".csv", index=False)

In [None]:
gbTest.graphicMap3D(columns = ["LATITUDE", "LONGITUDE", "FLOOR"], filename="buildingsMap3dTest")

## Model Training

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import make_scorer
from sklearn import metrics as scoreMetrics
import geopy.distance
from functools import reduce
#from sklearn.metrics import roc_auc_score # binary
#from sklearn.metrics import auc # binary


kf = KFold(n_splits=10)
modelClassifier = RandomForestClassifier(n_jobs=-1, random_state=seed)
modelRegressor = RandomForestRegressor(n_jobs=-1, random_state=seed)

def accert(y_true, y_pred): 
    cm = confusion_matrix(y_true, y_pred)
    return (cm.diagonal()/cm.sum(0)).mean()

_meanLat = 39.9926853
_meanLon = -0.0673033
_minLongitude = -7705
_maxLongitude = -7290
_minLatitude = 4864735
_maxLatitude = 4865023
_maxLatitudeGPS = 39.993720
_maxLongitudeGPS = -0.069254
_minLatitudeGPS = 39.991626
_minLongitudeGPS = -0.065425

def longitudeToGPS(x):
    return (_maxLongitudeGPS - _minLongitudeGPS) * (x - _minLongitude) / (_maxLongitude - _minLongitude) + _minLongitudeGPS

def latitudeToGPS(x):
    return (_maxLatitudeGPS - _minLatitudeGPS) * (x - _minLatitude) / (_maxLatitude - _minLatitude) + _minLatitudeGPS

def latitudeListDistance(y_true, y_pred):
    return list(map(lambda yt,yp : geopy.distance.vincenty((_meanLon, yt),(_meanLon, yp)).m , latitudeToGPS(y_true), latitudeToGPS(y_pred)))

def longitudeListDistance(y_true, y_pred):
    return list(map(lambda yt,yp : geopy.distance.vincenty((yt, _meanLat),(yp, _meanLat)).m , longitudeToGPS(y_true), longitudeToGPS(y_pred)))
    
def distance2d(y_true, y_pred):
    ldis = []
    if ((y_true>0).sum()>0):
        #ldis = list(map(lambda yt,yp : geopy.distance.vincenty((_meanLon, yt),(_meanLon, yp)).m , latitudeToGPS(y_true), latitudeToGPS(y_pred)))
        ldis = latitudeListDistance(y_true, y_pred)
    else:
        #ldis = list(map(lambda yt,yp : geopy.distance.vincenty((yt, _meanLat),(yp, _meanLat)).m , longitudeToGPS(y_true), longitudeToGPS(y_pred)))
        ldis = longitudeListDistance(y_true, y_pred)
    return reduce(lambda x,y: x+y, ldis) / len(ldis)

def mse(y_true, y_pred):
    return scoreMetrics.mean_squared_error(y_true, y_pred)
    
def mae(y_true, y_pred):
    return scoreMetrics.mean_absolute_error(y_true, y_pred)
    
scoring_acc = {
    #'average_precision' : 'average_precision_weighted',
    #'precision': 'precision',
    #'recall': 'recall',
    #'balanced_accuracy': 'balanced_accuracy',
    #'roc_auc': 'roc_auc',
    'nbaccuracy' : make_scorer(accert),
    'accuracy': 'accuracy'
}

scoring_reg = {
    'mae': make_scorer(mae),# 'mean_absolute_error',
    'mse': make_scorer(mse),#'mean_squared_error',
    'distance': make_scorer(distance2d),
    'r2': 'r2'
}

## TODO: Add and modify some metrics
## http://scikit-learn.org/stable/modules/model_evaluation.html
## https://www.icmla-conference.org/icmla10/CFP_Tutorial_files/jose.pdf