## Modelagem - Apartamentos 

Carga dos dados e pré processamento das variáveis para modelagem de preços de apartamentos.

In [None]:
import pandas as pd

data_apartamentos = pd.read_csv("data/html_apartamentos/data_apartamentos.csv")
data_apartamentos = data_apartamentos[["municipio", "area", "lat", "lng", "valor"]]

In [None]:
data_apartamentos = data_apartamentos.drop_duplicates()
data_apartamentos["municipio"] = data_apartamentos["municipio"].str.lower()
data_apartamentos["area"] = data_apartamentos["area"].str.replace("\n", "")
data_apartamentos["valor"] = data_apartamentos["valor"].str.replace("\n", "")
data_apartamentos = data_apartamentos[~data_apartamentos["area"].str.contains("-")]
data_apartamentos = data_apartamentos[~data_apartamentos["valor"].str.contains("A partir de ")]
data_apartamentos = data_apartamentos[~data_apartamentos["valor"].str.contains("Mês")]
data_apartamentos = data_apartamentos[~data_apartamentos["valor"].str.contains("Sob Consulta")]
data_apartamentos = data_apartamentos[~data_apartamentos["valor"].str.contains("Dia")]
data_apartamentos = data_apartamentos[~data_apartamentos["valor"].str.contains("Semana")]
data_apartamentos["valor"] = data_apartamentos["valor"].str.replace(".", "").str.replace("R", "").str.replace('[\$,]', '', regex=True).astype(float)
data_apartamentos["area"] = data_apartamentos["area"].astype(float)

Os modelos utilizarão de localidade e tamanho dos imóveis para identificar os preços.

In [None]:
data_apartamentos.head()

Unnamed: 0,municipio,area,lat,lng,valor
0,riodejaneiro,94.0,-22.91383,-43.252564,390000.0
2,riodejaneiro,83.0,-22.9689,-43.391418,559000.0
3,riodejaneiro,82.0,-22.971108,-43.190286,900000.0
4,riodejaneiro,200.0,-22.99682,-43.259414,1700000.0
5,riodejaneiro,46.0,-22.974553,-43.188991,529000.0


Para criar a rotina de otimização dos hiperparâmetros, optou-se pela biblioteca scikit-optimize, que utiliza da otimização bayesiana para encontrar as melhores combinações para os parâmetros do modelo. 

In [None]:
desempenho_mdl = pd.DataFrame(columns = ["mun", "msle"])

In [None]:
pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.8.1-py2.py3-none-any.whl (101 kB)
[?25l[K     |███▎                            | 10 kB 22.3 MB/s eta 0:00:01[K     |██████▌                         | 20 kB 28.9 MB/s eta 0:00:01[K     |█████████▊                      | 30 kB 13.4 MB/s eta 0:00:01[K     |█████████████                   | 40 kB 10.2 MB/s eta 0:00:01[K     |████████████████▏               | 51 kB 3.9 MB/s eta 0:00:01[K     |███████████████████▍            | 61 kB 4.6 MB/s eta 0:00:01[K     |██████████████████████▊         | 71 kB 4.4 MB/s eta 0:00:01[K     |██████████████████████████      | 81 kB 5.0 MB/s eta 0:00:01[K     |█████████████████████████████▏  | 92 kB 5.0 MB/s eta 0:00:01[K     |████████████████████████████████| 101 kB 3.7 MB/s 
Collecting pyaml>=16.9
  Downloading pyaml-21.8.3-py2.py3-none-any.whl (17 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-21.8.3 scikit-optimize-0.8.1


In [None]:
from skopt import gp_minimize
from lightgbm import LGBMRegressor
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import mean_squared_log_error
import pickle

for mun in data_apartamentos["municipio"].unique():

  print(f"Treinando modelo para {mun}")


  train = data_apartamentos[data_apartamentos["municipio"]==mun]
  train['valor'] = train['valor'].map(lambda x: math.log(x))

  X = train.drop(["valor", "municipio"], 1)
  y = train["valor"]

  def treinar_modelo(params):
        learning_rate = params[0]
        num_leaves = params[1]
        min_child_samples = params[2]
        subsample = params[3]
        colsample_bytree = params[4]
        feature_fraction = params[5]
        bagging_fraction = params[6]
        max_depth = params[7]
        min_split_gain = params[8]
        min_child_weight = params[9]
        
        print(params, '\n')

        mdl = LGBMRegressor(learning_rate=learning_rate, num_leaves=num_leaves, min_child_samples=min_child_samples,
                        subsample=subsample, colsample_bytree=colsample_bytree, feature_fraction=feature_fraction, 
                        bagging_fraction=bagging_fraction, max_depth=max_depth, min_split_gain=min_split_gain, 
                        min_child_weight=min_child_weight, random_state=0, subsample_freq=1, n_estimators=100)
        
        yhat = cross_val_predict(mdl, X, y, cv = 10)
        yhat_reais = np.exp(yhat)
        y_reais = np.exp(y)
        #yhat_reais = yhat
        #y_reais = y

        return mean_squared_log_error(y_reais, yhat_reais)


  space = [(1e-3, 1e-1, 'log-uniform'), #learning_rate
          (2, 128), # num_leaves
          (1, 100), # min_child_samples
          (0.05, 1.0), # subsamples
          (0.1, 1.0), # colsample_bytree
          (0.1, 0.9), # feature_fraction
          (0.8, 1), # bagging_fraction 
          (17, 25), # max_depth
          (0.001, 0.1), # min_split_gain
          (10, 25) # min_child_weight
          ] 

  resultados_gp = gp_minimize(treinar_modelo, space, random_state=1, verbose=1, n_calls=200, n_random_starts=10)

  ### append df desempenho

  desempenho_mdl = desempenho_mdl.append({"mun": mun, "msle": resultados_gp.fun}, ignore_index=True)

  ### saved model

  mdl = LGBMRegressor(learning_rate=resultados_gp.x[0], num_leaves=resultados_gp.x[1], min_child_samples=resultados_gp.x[2],
                      subsample=resultados_gp.x[3], colsample_bytree=resultados_gp.x[4], feature_fraction=resultados_gp.x[5],
                      bagging_fraction=resultados_gp.x[6], max_depth=resultados_gp.x[7], min_split_gain=resultados_gp.x[8], min_child_weight=resultados_gp.x[9],
                      random_state=0, subsample_freq=1, n_estimators=100)

  pkl_filename = f"models/apartamentos/mdl_apart_{mun}.pkl"
  with open(pkl_filename, 'wb') as file:
      pickle.dump(mdl, file)


desempenho_mdl.to_csv("models/apartamentos/desempenho_mdl.csv", index=False)

Treinando modelo para riodejaneiro
Iteration No: 1 started. Evaluating function at random point.
[0.09871192514273254, 120, 14, 0.9990884895579377, 0.3124800792567785, 0.417264581836821, 0.8775821482324015, 22, 0.09361836800979718, 23] 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.8189
Function value obtained: 0.1535
Current minimum: 0.1535
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.1573
Function value obtained: 0.4959
Current minimum: 0.1535
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.5777
Function value obtained: 0.1324
Current minimum: 0.1324
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

KeyboardInterrupt: ignored

In [None]:
desempenho_mdl

Unnamed: 0,mun,msle
0,riodejaneiro,0.120337
1,riodejaneiro,0.120337
2,brasília,0.109812
3,salvador,0.11761
4,fortaleza,0.119315
5,belohorizonte,0.114709
6,manaus,0.148878
7,curitiba,0.121017
8,goiânia,0.095487
9,belém,0.100101


### Modelagem - Residencias 

In [None]:
import pandas as pd

data_residen = pd.read_csv("data/html_residen/data_residen.csv")
data_residen = data_residen[["municipio", "area", "lat", "lng", "valor"]]

In [None]:
data_residen = data_residen.drop_duplicates()
data_residen["municipio"] = data_residen["municipio"].str.lower()
data_residen["area"] = data_residen["area"].str.replace("\n", "")
data_residen["valor"] = data_residen["valor"].str.replace("\n", "")
data_residen = data_residen[~data_residen["area"].str.contains("-")]
data_residen = data_residen[~data_residen["valor"].str.contains("A partir de ")]
data_residen = data_residen[~data_residen["valor"].str.contains("Mês")]
data_residen = data_residen[~data_residen["valor"].str.contains("Sob Consulta")]
data_residen = data_residen[~data_residen["valor"].str.contains("Dia")]
data_residen = data_residen[~data_residen["valor"].str.contains("Semana")]
data_residen["valor"] = data_residen["valor"].str.replace(".", "").str.replace("R", "").str.replace('[\$,]', '', regex=True).astype(float)
data_residen["area"] = data_residen["area"].astype(float)

In [None]:
desempenho_mdl = pd.DataFrame(columns = ["mun", "msle"])

In [None]:
from skopt import gp_minimize
from lightgbm import LGBMRegressor
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import mean_squared_log_error
import pickle
import math

for mun in data_residen["municipio"].unique():

  print(f"Treinando modelo para {mun}")

  train = data_residen[data_residen["municipio"]==mun]
  train['valor'] = train['valor'].map(lambda x: math.log(x))

  X = train.drop(["valor", "municipio"], 1)
  y = train["valor"]

  def treinar_modelo(params):
        learning_rate = params[0]
        num_leaves = params[1]
        min_child_samples = params[2]
        subsample = params[3]
        colsample_bytree = params[4]
        feature_fraction = params[5]
        bagging_fraction = params[6]
        max_depth = params[7]
        min_split_gain = params[8]
        min_child_weight = params[9]
        
        print(params, '\n')

        mdl = LGBMRegressor(learning_rate=learning_rate, num_leaves=num_leaves, min_child_samples=min_child_samples,
                        subsample=subsample, colsample_bytree=colsample_bytree, feature_fraction=feature_fraction, 
                        bagging_fraction=bagging_fraction, max_depth=max_depth, min_split_gain=min_split_gain, 
                        min_child_weight=min_child_weight, random_state=0, subsample_freq=1, n_estimators=100)
        
        yhat = cross_val_predict(mdl, X, y, cv = 10)
        yhat_reais = np.exp(yhat)
        y_reais = np.exp(y)
        #yhat_reais = yhat
        #y_reais = y

        return mean_squared_log_error(y_reais, yhat_reais)


  space = [(1e-3, 1e-1, 'log-uniform'), #learning_rate
          (2, 128), # num_leaves
          (1, 100), # min_child_samples
          (0.05, 1.0), # subsamples
          (0.1, 1.0), # colsample_bytree
          (0.1, 0.9), # feature_fraction
          (0.8, 1), # bagging_fraction 
          (17, 25), # max_depth
          (0.001, 0.1), # min_split_gain
          (10, 25) # min_child_weight
          ] 

  resultados_gp = gp_minimize(treinar_modelo, space, random_state=1, verbose=1, n_calls=200, n_random_starts=10)

  ### append df desempenho

  desempenho_mdl = desempenho_mdl.append({"mun": mun, "msle": resultados_gp.fun}, ignore_index=True)

  ### saved model

  mdl = LGBMRegressor(learning_rate=resultados_gp.x[0], num_leaves=resultados_gp.x[1], min_child_samples=resultados_gp.x[2],
                      subsample=resultados_gp.x[3], colsample_bytree=resultados_gp.x[4], feature_fraction=resultados_gp.x[5],
                      bagging_fraction=resultados_gp.x[6], max_depth=resultados_gp.x[7], min_split_gain=resultados_gp.x[8], min_child_weight=resultados_gp.x[9],
                      random_state=0, subsample_freq=1, n_estimators=100)

  pkl_filename = f"models/residencias/mdl_residen_{mun}.pkl"
  with open(pkl_filename, 'wb') as file:
      pickle.dump(mdl, file)


desempenho_mdl.to_csv("models/residencias/desempenho_mdl.csv", index=False)

Treinando modelo para sãopaulo
Iteration No: 1 started. Evaluating function at random point.
[0.09871192514273254, 120, 14, 0.9990884895579377, 0.3124800792567785, 0.417264581836821, 0.8775821482324015, 22, 0.09361836800979718, 23] 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.9540
Function value obtained: 0.2003
Current minimum: 0.2003
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.1203
Function value obtained: 0.6067
Current minimum: 0.2003
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.3672
Function value obtained: 0.1532
Current minimum: 0.1532
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 1.0579
Function value obtained: 0.2637
Current minimum: 0.2637
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.1538
Function value obtained: 0.7637
Current minimum: 0.2637
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.3897
Function value obtained: 0.2172
Current minimum: 0.2172
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.5607
Function value obtained: 0.2536
Current minimum: 0.2536
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.4858
Function value obtained: 0.6012
Current minimum: 0.2536
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.9842
Function value obtained: 0.2169
Current minimum: 0.2169
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.8539
Function value obtained: 0.3813
Current minimum: 0.3813
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.7596
Function value obtained: 0.9099
Current minimum: 0.3813
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.6175
Function value obtained: 0.3336
Current minimum: 0.3336
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.7025
Function value obtained: 0.1464
Current minimum: 0.1464
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.8990
Function value obtained: 0.2971
Current minimum: 0.1464
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.9873
Function value obtained: 0.1326
Current minimum: 0.1326
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.8138
Function value obtained: 0.1601
Current minimum: 0.1601
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.1014
Function value obtained: 0.3684
Current minimum: 0.1601
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.2590
Function value obtained: 0.1257
Current minimum: 0.1257
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3243
Function value obtained: 0.2589
Current minimum: 0.2589
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2619
Function value obtained: 0.4372
Current minimum: 0.2589
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.4607
Function value obtained: 0.2352
Current minimum: 0.2352
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.7817
Function value obtained: 0.1496
Current minimum: 0.1496
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.9275
Function value obtained: 0.4306
Current minimum: 0.1496
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.0889
Function value obtained: 0.1290
Current minimum: 0.1290
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.8353
Function value obtained: 0.1981
Current minimum: 0.1981
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.9090
Function value obtained: 0.4374
Current minimum: 0.1981
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.8653
Function value obtained: 0.1691
Current minimum: 0.1691
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3047
Function value obtained: 0.3534
Current minimum: 0.3534
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2327
Function value obtained: 0.5858
Current minimum: 0.3534
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.3456
Function value obtained: 0.3383
Current minimum: 0.3383
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.6888
Function value obtained: 0.1689
Current minimum: 0.1689
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.8108
Function value obtained: 0.3525
Current minimum: 0.1689
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.7630
Function value obtained: 0.1477
Current minimum: 0.1477
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3258
Function value obtained: 0.2360
Current minimum: 0.2360
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2605
Function value obtained: 0.4159
Current minimum: 0.2360
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.4240
Function value obtained: 0.1989
Current minimum: 0.1989
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2464
Function value obtained: 0.1674
Current minimum: 0.1674
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2143
Function value obtained: 0.4014
Current minimum: 0.1674
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.3061
Function value obtained: 0.1808
Current minimum: 0.1674
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806



Iteration No: 148 ended. Search finished for the next optimal point.
Time taken: 4.1382
Function value obtained: 0.1612
Current minimum: 0.1609
Iteration No: 149 started. Searching for the next optimal point.
[0.1, 91, 19, 1.0, 0.1, 0.9, 0.8, 17, 0.001, 25] 

Iteration No: 149 ended. Search finished for the next optimal point.
Time taken: 4.1646
Function value obtained: 0.1612
Current minimum: 0.1609
Iteration No: 150 started. Searching for the next optimal point.
[0.1, 128, 28, 1.0, 1.0, 0.9, 0.887311707308095, 25, 0.001, 25] 

Iteration No: 150 ended. Search finished for the next optimal point.
Time taken: 4.3070
Function value obtained: 0.1640
Current minimum: 0.1609
Iteration No: 151 started. Searching for the next optimal point.
[0.04075530988304775, 2, 29, 0.7570588139673861, 0.8832436998828246, 0.4348096617543088, 0.8012791493816366, 21, 0.004929707296495226, 10] 

Iteration No: 151 ended. Search finished for the next optimal point.
Time taken: 4.4743
Function value obtained: 0.



Iteration No: 196 ended. Search finished for the next optimal point.
Time taken: 7.5483
Function value obtained: 0.1612
Current minimum: 0.1609
Iteration No: 197 started. Searching for the next optimal point.
[0.1, 103, 1, 1.0, 1.0, 0.9, 0.8, 17, 0.001, 25] 

Iteration No: 197 ended. Search finished for the next optimal point.
Time taken: 7.3758
Function value obtained: 0.1612
Current minimum: 0.1609
Iteration No: 198 started. Searching for the next optimal point.
[0.08556158891768433, 128, 1, 0.05, 0.1, 0.9, 0.8, 25, 0.001, 25] 

Iteration No: 198 ended. Search finished for the next optimal point.
Time taken: 7.6624
Function value obtained: 0.1632
Current minimum: 0.1609
Iteration No: 199 started. Searching for the next optimal point.
[0.1, 42, 23, 0.6042570111555987, 0.1, 0.9, 0.8, 25, 0.001, 22] 

Iteration No: 199 ended. Search finished for the next optimal point.
Time taken: 7.8009
Function value obtained: 0.1636
Current minimum: 0.1609
Iteration No: 200 started. Searching for the

### Modelagem - Terrenos

In [None]:
import pandas as pd

data_terrenos = pd.read_csv("data/html_terrenos/data_terrenos.csv")
data_terrenos = data_terrenos[["municipio", "area", "lat", "lng", "valor"]]

In [None]:
data_terrenos = data_terrenos.drop_duplicates()
data_terrenos["municipio"] = data_terrenos["municipio"].str.lower()
data_terrenos["area"] = data_terrenos["area"].str.replace("\n", "")
data_terrenos["valor"] = data_terrenos["valor"].str.replace("\n", "")
data_terrenos = data_terrenos[~data_terrenos["area"].str.contains("-")]
data_terrenos = data_terrenos[~data_terrenos["valor"].str.contains("A partir de ")]
data_terrenos = data_terrenos[~data_terrenos["valor"].str.contains("Mês")]
data_terrenos = data_terrenos[~data_terrenos["valor"].str.contains("Sob Consulta")]
data_terrenos = data_terrenos[~data_terrenos["valor"].str.contains("Dia")]
data_terrenos = data_terrenos[~data_terrenos["valor"].str.contains("Semana")]
data_terrenos["valor"] = data_terrenos["valor"].str.replace(".", "").str.replace("R", "").str.replace('[\$,]', '', regex=True).astype(float)
data_terrenos["area"] = data_terrenos["area"].astype(float)

In [None]:
desempenho_mdl = pd.DataFrame(columns = ["mun", "msle"])

In [None]:
from skopt import gp_minimize
from lightgbm import LGBMRegressor
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import mean_squared_log_error
import pickle

for mun in data_terrenos["municipio"].unique():

  print(f"Treinando modelo para {mun}")

  train = data_terrenos[data_terrenos["municipio"]==mun]
  train['valor'] = train['valor'].map(lambda x: math.log(x))

  X = train.drop(["valor", "municipio"], 1)
  y = train["valor"]

  def treinar_modelo(params):
        learning_rate = params[0]
        num_leaves = params[1]
        min_child_samples = params[2]
        subsample = params[3]
        colsample_bytree = params[4]
        feature_fraction = params[5]
        bagging_fraction = params[6]
        max_depth = params[7]
        min_split_gain = params[8]
        min_child_weight = params[9]
        
        print(params, '\n')

        mdl = LGBMRegressor(learning_rate=learning_rate, num_leaves=num_leaves, min_child_samples=min_child_samples,
                        subsample=subsample, colsample_bytree=colsample_bytree, feature_fraction=feature_fraction, 
                        bagging_fraction=bagging_fraction, max_depth=max_depth, min_split_gain=min_split_gain, 
                        min_child_weight=min_child_weight, random_state=0, subsample_freq=1, n_estimators=100)
        
        yhat = cross_val_predict(mdl, X, y, cv = 10)
        yhat_reais = np.exp(yhat)
        y_reais = np.exp(y)
        #yhat_reais = yhat
        #y_reais = y

        return mean_squared_log_error(y_reais, yhat_reais)


  space = [(1e-3, 1e-1, 'log-uniform'), #learning_rate
          (2, 128), # num_leaves
          (1, 100), # min_child_samples
          (0.05, 1.0), # subsamples
          (0.1, 1.0), # colsample_bytree
          (0.1, 0.9), # feature_fraction
          (0.8, 1), # bagging_fraction 
          (17, 25), # max_depth
          (0.001, 0.1), # min_split_gain
          (10, 25) # min_child_weight
          ] 

  resultados_gp = gp_minimize(treinar_modelo, space, random_state=1, verbose=1, n_calls=200, n_random_starts=10)

  ### append df desempenho

  desempenho_mdl = desempenho_mdl.append({"mun": mun, "msle": resultados_gp.fun}, ignore_index=True)

  ### saved model

  mdl = LGBMRegressor(learning_rate=resultados_gp.x[0], num_leaves=resultados_gp.x[1], min_child_samples=resultados_gp.x[2],
                      subsample=resultados_gp.x[3], colsample_bytree=resultados_gp.x[4], feature_fraction=resultados_gp.x[5],
                      bagging_fraction=resultados_gp.x[6], max_depth=resultados_gp.x[7], min_split_gain=resultados_gp.x[8], min_child_weight=resultados_gp.x[9],
                      random_state=0, subsample_freq=1, n_estimators=100)

  pkl_filename = f"models/terrenos/mdl_terrenos_{mun}.pkl"
  with open(pkl_filename, 'wb') as file:
      pickle.dump(mdl, file)


desempenho_mdl.to_csv("models/terrenos/desempenho_mdl.csv", index=False)

Treinando modelo para sãopaulo
Iteration No: 1 started. Evaluating function at random point.
[0.09871192514273254, 120, 14, 0.9990884895579377, 0.3124800792567785, 0.417264581836821, 0.8775821482324015, 22, 0.09361836800979718, 23] 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.8622
Function value obtained: 0.1997
Current minimum: 0.1997
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.0528
Function value obtained: 0.6038
Current minimum: 0.1997
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.2851
Function value obtained: 0.1544
Current minimum: 0.1544
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 1.0028
Function value obtained: 0.2618
Current minimum: 0.2618
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.1351
Function value obtained: 0.7616
Current minimum: 0.2618
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.4120
Function value obtained: 0.2169
Current minimum: 0.2169
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.5566
Function value obtained: 0.2527
Current minimum: 0.2527
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.4939
Function value obtained: 0.5969
Current minimum: 0.2527
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.0565
Function value obtained: 0.2138
Current minimum: 0.2138
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.8560
Function value obtained: 0.3629
Current minimum: 0.3629
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.7759
Function value obtained: 0.8901
Current minimum: 0.3629
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.6643
Function value obtained: 0.3229
Current minimum: 0.3229
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.7218
Function value obtained: 0.1453
Current minimum: 0.1453
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.9423
Function value obtained: 0.2968
Current minimum: 0.1453
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.9547
Function value obtained: 0.1324
Current minimum: 0.1324
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3042
Function value obtained: 0.3522
Current minimum: 0.3522
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2358
Function value obtained: 0.5816
Current minimum: 0.3522
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.3800
Function value obtained: 0.3314
Current minimum: 0.3314
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.8635
Function value obtained: 0.1580
Current minimum: 0.1580
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1.0399
Function value obtained: 0.3677
Current minimum: 0.1580
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.2079
Function value obtained: 0.1247
Current minimum: 0.1247
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.7232
Function value obtained: 0.1670
Current minimum: 0.1670
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.8293
Function value obtained: 0.3523
Current minimum: 0.1670
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.7288
Function value obtained: 0.1459
Current minimum: 0.1459
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.7986
Function value obtained: 0.2015
Current minimum: 0.2015
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.8869
Function value obtained: 0.4374
Current minimum: 0.2015
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.8821
Function value obtained: 0.1718
Current minimum: 0.1718
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.7667
Function value obtained: 0.1499
Current minimum: 0.1499
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.9096
Function value obtained: 0.4296
Current minimum: 0.1499
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 2.0729
Function value obtained: 0.1286
Current minimum: 0.1286
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3549
Function value obtained: 0.2518
Current minimum: 0.2518
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2722
Function value obtained: 0.4337
Current minimum: 0.2518
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.4605
Function value obtained: 0.2287
Current minimum: 0.2287
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.3354
Function value obtained: 0.2307
Current minimum: 0.2307
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2897
Function value obtained: 0.4148
Current minimum: 0.2307
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.4182
Function value obtained: 0.1977
Current minimum: 0.1977
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.2838
Function value obtained: 0.1674
Current minimum: 0.1674
Iteration No: 2 started. Evaluating function at random point.
[0.004232013397179603, 68, 45, 0.2680983530433343, 0.5809725180523154, 0.8311696196633865, 0.8914409615973977, 20, 0.09397365115293897, 22] 

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2432
Function value obtained: 0.4021
Current minimum: 0.1674
Iteration No: 3 started. Evaluating function at random point.
[0.027035912483147396, 103, 10, 0.5422449214947946, 0.8785182267810853, 0.7633175258937135, 0.9659206718788368, 19, 0.006865076928854784, 20] 

Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.3169
Function value obtained: 0.1817
Current minimum: 0.1674
Iteration No: 4 started. Evaluating function at random point.
[0.01535080081765723, 87, 42, 0.23767335308554222, 0.3606666764912312, 0.21369610837996558, 0.9566628945389772, 20, 0.00438295980739806



Iteration No: 168 ended. Search finished for the next optimal point.
Time taken: 5.6981
Function value obtained: 0.1652
Current minimum: 0.1642
Iteration No: 169 started. Searching for the next optimal point.
[0.1, 47, 25, 1.0, 0.8365796179212991, 0.1, 1.0, 17, 0.013554685706173141, 25] 

Iteration No: 169 ended. Search finished for the next optimal point.
Time taken: 5.9210
Function value obtained: 0.1653
Current minimum: 0.1642
Iteration No: 170 started. Searching for the next optimal point.
[0.1, 84, 1, 0.05, 0.1, 0.1, 1.0, 21, 0.001, 25] 

Iteration No: 170 ended. Search finished for the next optimal point.
Time taken: 6.0320
Function value obtained: 0.1652
Current minimum: 0.1642
Iteration No: 171 started. Searching for the next optimal point.
[0.1, 68, 1, 0.05, 1.0, 0.1, 0.8, 25, 0.001, 25] 

Iteration No: 171 ended. Search finished for the next optimal point.
Time taken: 5.8779
Function value obtained: 0.1664
Current minimum: 0.1642
Iteration No: 172 started. Searching for the n