In [1]:
# Data manipulation
import numpy as np
import pandas as pd
from math import *
import seaborn as sns
import networkx as nx
import os
import scipy.stats as stats

# Visualization.
import matplotlib.pyplot as plt

# Saving models
from datetime import datetime
import joblib

# Display all columns
pd.set_option('display.max_columns', 150,
             'display.max_rows', 150)

# ML
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, RepeatedKFold, train_test_split, cross_validate, cross_val_score 
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score, explained_variance_score, mean_squared_error, median_absolute_error#, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn import ensemble
from sklearn.ensemble import RandomForestRegressor


import requests, json
from sqlalchemy import create_engine

#### Load helper functions

In [2]:
%run ./functions.ipynb

In [3]:
path0 = os.getcwd()

In [4]:
full = pd.read_csv(os.path.join(path0, "Full.csv"))

In [5]:
full.head(5)

Unnamed: 0.1,Unnamed: 0,order_id,lat_destination,lng_destination,promised_time,on_demand,shopper_id,store_branch_id,total_minutes,quantity_UN,quantity_KG,is_more_UN,UN_plus_KG,UN_mult_KG,seniority,found_rate,picking_speed,accepted_rate,rating,store_id,lat_origin,lng_origin,Hour,Month,Day_of_Week,Year,Date,period,distance_havesine,distance_car,weight_car,duration,city_origin,state_origin,county_origin,neighbourhood_origin,city_destiny,state_destiny,county_destiny,neighbourhood_destiny,same_city,same_state,same_county,same_neighbourhood,path_city,path_state,path_county,shoppers_number,store_branch_number
0,0,e750294655c2c7c34d83cc3181c09de4,-33.501675,-70.579369,2019-10-18 20:48:00+00:00,True,e63bc83a1a952fa2b3cc9d558fb943cf,65ded5353c5ee48d0b7d48c591b8f430,67.684264,16.0,2.756,1.0,18.756,44.096,6c90661e6d2c7579f5ce337c3391dbb9,0.9024,1.3,0.92,4.76,c4ca4238a0b923820dcc509a6f75849b,-33.48528,-70.57925,20,10,4,2019,18,night,1.823597,3367.1,380.8,380.8,Macul,Región Metropolitana de Santiago,Provincia de Santiago,Villa Universidad Católica,Peñalolén,Región Metropolitana de Santiago,Provincia de Santiago,Conjunto San Luis,0.0,1.0,1.0,0.0,Macul_X_Peñalolén,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698
1,1,6581174846221cb6c467348e87f57641,-33.440584,-70.556283,2019-10-19 01:00:00+00:00,False,195f9e9d84a4ba9033c4b6a756334d8b,45fbc6d3e05ebd93369ce542e8f2322d,57.060632,11.0,0.0,1.0,11.0,0.0,41dc7c9e385c4d2b6c1f7836973951bf,0.761,2.54,0.92,4.96,c4ca4238a0b923820dcc509a6f75849b,-33.441246,-70.53545,1,10,5,2019,19,dawn,1.935026,2373.9,229.1,229.1,undefined,Región Metropolitana de Santiago,Provincia de Santiago,La Reina,undefined,Región Metropolitana de Santiago,Provincia de Santiago,La Reina,0.0,1.0,1.0,1.0,undefined_X_undefined,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698
2,2,3a226ea48debc0a7ae9950d5540f2f34,-32.987022,-71.544842,2019-10-19 14:54:00+00:00,True,a5b9ddc0d82e61582fca19ad43dbaacb,07563a3fe3bbe7e3ba84431ad9d055af,,18.0,0.0,1.0,18.0,0.0,50e13ee63f086c2fe84229348bc91b5b,0.8313,2.57,0.76,4.92,c4ca4238a0b923820dcc509a6f75849b,-33.008213,-71.545615,14,10,5,2019,19,afternoon,2.358128,2930.5,298.2,298.2,Viña del Mar,Región de Valparaíso,Provincia de Valparaíso,Población Británica,Viña del Mar,Región de Valparaíso,Provincia de Valparaíso,Población Naval Las Salinas,1.0,1.0,1.0,0.0,Viña del Mar_X_Viña del Mar,Región de Valparaíso_X_Región de Valparaíso,Provincia de Valparaíso_X_Provincia de Valparaíso,7698,7698
3,3,7d2ed03fe4966083e74b12694b1669d8,-33.328075,-70.512659,2019-10-18 21:47:00+00:00,True,d0b3f6bf7e249e5ebb8d3129341773a2,f1748d6b0fd9d439f71450117eba2725,52.067742,1.0,0.0,1.0,1.0,0.0,41dc7c9e385c4d2b6c1f7836973951bf,0.8776,2.8,0.96,4.76,f718499c1c8cef6730f9fd03c8125cab,-33.355258,-70.537787,21,10,4,2019,18,night,3.820244,5632.1,3826.0,549.3,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,Las Pataguas,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,Alpes Suizos,1.0,1.0,1.0,0.0,Lo Barnechea_X_Lo Barnechea,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,51,51
4,4,b4b2682d77118155fe4716300ccf7f39,-33.403239,-70.56402,2019-10-19 20:00:00+00:00,False,5c5199ce02f7b77caa9c2590a39ad27d,1f0e3dad99908345f7439f8ffabdffc4,140.724822,91.0,6.721,1.0,97.721,611.611,50e13ee63f086c2fe84229348bc91b5b,0.7838,2.4,0.96,4.96,c4ca4238a0b923820dcc509a6f75849b,-33.386547,-70.568075,20,10,5,2019,19,night,1.894474,2939.7,223.8,223.8,Vitacura,Región Metropolitana de Santiago,Provincia de Santiago,,Las Condes,Región Metropolitana de Santiago,Provincia de Santiago,,0.0,1.0,1.0,0.0,Vitacura_X_Las Condes,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698


In [6]:
temp = pd.read_csv(os.path.join(path0, "chile_temp.csv"),  index_col=0)
pre = pd.read_csv(os.path.join(path0, "chile_pre.csv"), index_col=0)

In [7]:
full = pd.merge(full, temp, how='left', on=['county_origin','Date'])
full = pd.merge(full, pre, how='left', on=['county_origin','Date'])

In [8]:
full = zscore(full, cols = ['quantity_KG','quantity_UN', 'distance_car', 'distance_havesine'])

In [9]:
full.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9438 entries, 0 to 9977
Data columns (total 55 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             9438 non-null   int64  
 1   order_id               9438 non-null   object 
 2   lat_destination        9438 non-null   float64
 3   lng_destination        9438 non-null   float64
 4   promised_time          9438 non-null   object 
 5   on_demand              9438 non-null   bool   
 6   shopper_id             9438 non-null   object 
 7   store_branch_id        9438 non-null   object 
 8   total_minutes          7558 non-null   float64
 9   quantity_UN            9438 non-null   float64
 10  quantity_KG            9438 non-null   float64
 11  is_more_UN             9438 non-null   float64
 12  UN_plus_KG             9438 non-null   float64
 13  UN_mult_KG             9438 non-null   float64
 14  seniority              9438 non-null   object 
 15  foun

In [10]:
full.isnull().sum()

Unnamed: 0                  0
order_id                    0
lat_destination             0
lng_destination             0
promised_time               0
on_demand                   0
shopper_id                  0
store_branch_id             0
total_minutes            1880
quantity_UN                 0
quantity_KG                 0
is_more_UN                  0
UN_plus_KG                  0
UN_mult_KG                  0
seniority                   0
found_rate                  0
picking_speed               0
accepted_rate               0
rating                      0
store_id                    0
lat_origin                  0
lng_origin                  0
Hour                        0
Month                       0
Day_of_Week                 0
Year                        0
Date                        0
period                      0
distance_havesine           0
distance_car                0
weight_car                  0
duration                    0
city_origin                 0
state_orig

In [11]:
var_cat = ['on_demand', 'seniority', 'Hour', 'Day_of_Week', 'period',
           'county_origin', 'county_destiny', 'state_origin', 'state_destiny',
           'city_origin', 'city_destiny',
           'path_city',
           'path_state', 
           'path_county',
           'same_city', 'same_state', 'same_county',
           'same_neighbourhood'
          ]

var_num = ['quantity_UN', 'quantity_KG', 'UN_plus_KG', 'UN_mult_KG',
           'distance_havesine',
           'found_rate', 'picking_speed','accepted_rate', 'rating',
           'distance_car', 
           'duration',
           'shoppers_number',
           'store_branch_number', 'temperature', 'precipitation'
          ]

In [12]:
dft = pd.get_dummies(full[var_cat], columns= var_cat, drop_first=True)
cat_var = dft.columns

In [13]:
full = pd.get_dummies(full, columns = var_cat)

In [14]:
corr_var = corrX_orig(full[list(cat_var)+list(var_num)], cut = 0.8)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  up = corr_mtx.where(np.triu(np.ones(corr_mtx.shape), k=1).astype(np.bool))


In [15]:
target = ['total_minutes', 'order_id']
full = full[list(set(full[list(cat_var) + list(var_num)].columns) - set(corr_var)) + target]

In [16]:
var_num = list(set(var_num) - set(corr_var))

### Out of Sample

In [17]:
ofs = full[full.total_minutes.isnull()]

In [18]:
ofs.head(3)

Unnamed: 0,path_city_Las Condes_X_Santiago,Hour_11,path_city_undefined_X_Vitacura,city_destiny_Renca,path_city_undefined_X_Las Condes,path_county_Provincia de Maipo_X_Provincia de Maipo,Hour_13,path_city_Pudahuel_X_undefined,path_city_Las Condes_X_undefined,temperature,path_city_undefined_X_Renca,path_city_Viña del Mar_X_undefined,city_destiny_San Ramón,path_city_Vitacura_X_Santiago,path_city_Las Condes_X_Huechuraba,city_destiny_Talcahuano,path_city_Viña del Mar_X_Viña del Mar,path_city_undefined_X_El Bosque,path_city_San Joaquín_X_Providencia,path_city_Ñuñoa_X_Las Condes,path_city_Providencia_X_Vitacura,path_city_Conchalí_X_Huechuraba,path_city_undefined_X_La Florida,same_city_1.0,path_city_Santiago_X_Ñuñoa,city_origin_Santiago,city_origin_Vitacura,Hour_21,Hour_1,path_city_Valparaíso_X_Valparaíso,city_origin_Ñuñoa,path_city_San Joaquín_X_Ñuñoa,city_destiny_La Granja,path_city_Ñuñoa_X_Santiago,path_city_Las Condes_X_Providencia,same_neighbourhood_1.0,Hour_14,path_city_Estación Central_X_Independencia,city_destiny_El Bosque,path_city_San Joaquín_X_Santiago,path_city_Estación Central_X_Recoleta,path_city_Macul_X_San Miguel,path_city_Colina_X_Lo Barnechea,distance_havesine,city_destiny_La Serena,path_city_Vitacura_X_Ñuñoa,period_night,path_city_undefined_X_Estación Central,path_city_Providencia_X_undefined,Hour_16,path_city_La Serena_X_Coquimbo,path_city_Peñalolén_X_undefined,path_county_Provincia de Cordillera_X_Provincia de Cordillera,path_city_Las Condes_X_Vitacura,path_city_Santiago_X_San Miguel,path_city_undefined_X_undefined,path_city_Estación Central_X_Santiago,path_city_Viña del Mar_X_Valparaíso,path_city_Valparaíso_X_Viña del Mar,city_origin_Recoleta,path_city_Macul_X_Providencia,city_destiny_Lo Prado,path_city_Colina_X_Colina,city_destiny_Santiago,path_city_Ñuñoa_X_Ñuñoa,path_city_Las Condes_X_Lo Barnechea,path_city_Providencia_X_Recoleta,path_city_undefined_X_Colina,path_city_Macul_X_Peñalolén,path_city_San Miguel_X_El Bosque,path_city_Ñuñoa_X_Peñalolén,on_demand_True,path_city_Estación Central_X_Renca,path_city_undefined_X_Viña del Mar,city_destiny_La Cisterna,...,path_city_Vitacura_X_Vitacura,path_city_San Joaquín_X_San Joaquín,path_city_Las Condes_X_Ñuñoa,path_city_Vitacura_X_Las Condes,city_origin_Estación Central,path_city_undefined_X_Huechuraba,path_city_La Florida_X_undefined,path_city_undefined_X_Providencia,shoppers_number,path_city_San Miguel_X_San Miguel,path_county_Provincia de Cordillera_X_Provincia de Santiago,accepted_rate,Hour_18,path_city_Cerrillos_X_El Bosque,path_county_Provincia de Maipo_X_Provincia de Santiago,Day_of_Week_6,path_city_San Miguel_X_San Joaquín,city_destiny_Lo Espejo,path_city_Santiago_X_Santiago,path_city_Peñalolén_X_Ñuñoa,path_city_Las Condes_X_Las Condes,path_city_undefined_X_Peñalolén,path_city_Providencia_X_Santiago,city_origin_Talcahuano,path_city_Conchalí_X_Renca,path_city_Concepción_X_undefined,path_city_San Joaquín_X_San Miguel,city_destiny_Ñuñoa,path_city_Providencia_X_Ñuñoa,found_rate,Day_of_Week_5,period_dawn,path_city_Ñuñoa_X_undefined,path_city_Santiago_X_Providencia,path_city_La Florida_X_La Florida,path_city_Vitacura_X_Recoleta,city_origin_undefined,city_origin_Providencia,county_origin_Provincia de Santiago,path_city_Lo Barnechea_X_Vitacura,seniority_50e13ee63f086c2fe84229348bc91b5b,path_city_Estación Central_X_Conchalí,path_city_Cerrillos_X_undefined,Hour_17,Hour_23,city_origin_San Pedro de la Paz,path_city_undefined_X_Pudahuel,seniority_6c90661e6d2c7579f5ce337c3391dbb9,city_origin_Concepción,path_city_undefined_X_Concepción,city_origin_La Serena,city_destiny_Cerro Navia,path_city_Concepción_X_San Pedro de la Paz,path_city_San Miguel_X_Santiago,path_city_Las Condes_X_Peñalolén,path_city_Lo Barnechea_X_Lo Barnechea,path_city_La Florida_X_San Miguel,path_city_Conchalí_X_Conchalí,city_destiny_Huechuraba,path_city_undefined_X_San Pedro de la Paz,path_city_Providencia_X_Providencia,same_county_1.0,quantity_UN,path_city_Macul_X_Ñuñoa,city_origin_Huechuraba,path_city_Macul_X_Macul,path_city_La Florida_X_Peñalolén,path_city_Vitacura_X_Providencia,seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f,path_city_San Joaquín_X_Macul,city_origin_Valparaíso,path_city_Vitacura_X_Huechuraba,path_city_Conchalí_X_Independencia,total_minutes,order_id
2,0,0,0,0,0,0,0,0,0,285.531514,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2.358128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,7698,0,0,0.76,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.8313,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,18.0,0,0,0,0,0,0,0,0,0,0,,3a226ea48debc0a7ae9950d5540f2f34
5,0,0,0,0,0,0,0,0,0,283.223732,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.190793,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,699,0,0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.8946,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,15.0,0,0,0,0,0,0,0,0,0,0,,9bf29b56619fcaf60b52690a848e10bb
9,0,0,0,0,0,0,0,0,0,283.223732,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.512359,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,7698,0,0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.9363,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,94.0,0,0,0,0,0,0,0,0,0,0,,299d948a5fd2cf2a921894b9bd24b94e


In [19]:
ofs.isnull().sum()

path_city_Las Condes_X_Santiago          0
Hour_11                                  0
path_city_undefined_X_Vitacura           0
city_destiny_Renca                       0
path_city_undefined_X_Las Condes         0
                                      ... 
city_origin_Valparaíso                   0
path_city_Vitacura_X_Huechuraba          0
path_city_Conchalí_X_Independencia       0
total_minutes                         1880
order_id                                 0
Length: 191, dtype: int64

### Modeling data

In [20]:
modeling = full[~full.total_minutes.isnull()]

In [21]:
modeling.head(3)

Unnamed: 0,path_city_Las Condes_X_Santiago,Hour_11,path_city_undefined_X_Vitacura,city_destiny_Renca,path_city_undefined_X_Las Condes,path_county_Provincia de Maipo_X_Provincia de Maipo,Hour_13,path_city_Pudahuel_X_undefined,path_city_Las Condes_X_undefined,temperature,path_city_undefined_X_Renca,path_city_Viña del Mar_X_undefined,city_destiny_San Ramón,path_city_Vitacura_X_Santiago,path_city_Las Condes_X_Huechuraba,city_destiny_Talcahuano,path_city_Viña del Mar_X_Viña del Mar,path_city_undefined_X_El Bosque,path_city_San Joaquín_X_Providencia,path_city_Ñuñoa_X_Las Condes,path_city_Providencia_X_Vitacura,path_city_Conchalí_X_Huechuraba,path_city_undefined_X_La Florida,same_city_1.0,path_city_Santiago_X_Ñuñoa,city_origin_Santiago,city_origin_Vitacura,Hour_21,Hour_1,path_city_Valparaíso_X_Valparaíso,city_origin_Ñuñoa,path_city_San Joaquín_X_Ñuñoa,city_destiny_La Granja,path_city_Ñuñoa_X_Santiago,path_city_Las Condes_X_Providencia,same_neighbourhood_1.0,Hour_14,path_city_Estación Central_X_Independencia,city_destiny_El Bosque,path_city_San Joaquín_X_Santiago,path_city_Estación Central_X_Recoleta,path_city_Macul_X_San Miguel,path_city_Colina_X_Lo Barnechea,distance_havesine,city_destiny_La Serena,path_city_Vitacura_X_Ñuñoa,period_night,path_city_undefined_X_Estación Central,path_city_Providencia_X_undefined,Hour_16,path_city_La Serena_X_Coquimbo,path_city_Peñalolén_X_undefined,path_county_Provincia de Cordillera_X_Provincia de Cordillera,path_city_Las Condes_X_Vitacura,path_city_Santiago_X_San Miguel,path_city_undefined_X_undefined,path_city_Estación Central_X_Santiago,path_city_Viña del Mar_X_Valparaíso,path_city_Valparaíso_X_Viña del Mar,city_origin_Recoleta,path_city_Macul_X_Providencia,city_destiny_Lo Prado,path_city_Colina_X_Colina,city_destiny_Santiago,path_city_Ñuñoa_X_Ñuñoa,path_city_Las Condes_X_Lo Barnechea,path_city_Providencia_X_Recoleta,path_city_undefined_X_Colina,path_city_Macul_X_Peñalolén,path_city_San Miguel_X_El Bosque,path_city_Ñuñoa_X_Peñalolén,on_demand_True,path_city_Estación Central_X_Renca,path_city_undefined_X_Viña del Mar,city_destiny_La Cisterna,...,path_city_Vitacura_X_Vitacura,path_city_San Joaquín_X_San Joaquín,path_city_Las Condes_X_Ñuñoa,path_city_Vitacura_X_Las Condes,city_origin_Estación Central,path_city_undefined_X_Huechuraba,path_city_La Florida_X_undefined,path_city_undefined_X_Providencia,shoppers_number,path_city_San Miguel_X_San Miguel,path_county_Provincia de Cordillera_X_Provincia de Santiago,accepted_rate,Hour_18,path_city_Cerrillos_X_El Bosque,path_county_Provincia de Maipo_X_Provincia de Santiago,Day_of_Week_6,path_city_San Miguel_X_San Joaquín,city_destiny_Lo Espejo,path_city_Santiago_X_Santiago,path_city_Peñalolén_X_Ñuñoa,path_city_Las Condes_X_Las Condes,path_city_undefined_X_Peñalolén,path_city_Providencia_X_Santiago,city_origin_Talcahuano,path_city_Conchalí_X_Renca,path_city_Concepción_X_undefined,path_city_San Joaquín_X_San Miguel,city_destiny_Ñuñoa,path_city_Providencia_X_Ñuñoa,found_rate,Day_of_Week_5,period_dawn,path_city_Ñuñoa_X_undefined,path_city_Santiago_X_Providencia,path_city_La Florida_X_La Florida,path_city_Vitacura_X_Recoleta,city_origin_undefined,city_origin_Providencia,county_origin_Provincia de Santiago,path_city_Lo Barnechea_X_Vitacura,seniority_50e13ee63f086c2fe84229348bc91b5b,path_city_Estación Central_X_Conchalí,path_city_Cerrillos_X_undefined,Hour_17,Hour_23,city_origin_San Pedro de la Paz,path_city_undefined_X_Pudahuel,seniority_6c90661e6d2c7579f5ce337c3391dbb9,city_origin_Concepción,path_city_undefined_X_Concepción,city_origin_La Serena,city_destiny_Cerro Navia,path_city_Concepción_X_San Pedro de la Paz,path_city_San Miguel_X_Santiago,path_city_Las Condes_X_Peñalolén,path_city_Lo Barnechea_X_Lo Barnechea,path_city_La Florida_X_San Miguel,path_city_Conchalí_X_Conchalí,city_destiny_Huechuraba,path_city_undefined_X_San Pedro de la Paz,path_city_Providencia_X_Providencia,same_county_1.0,quantity_UN,path_city_Macul_X_Ñuñoa,city_origin_Huechuraba,path_city_Macul_X_Macul,path_city_La Florida_X_Peñalolén,path_city_Vitacura_X_Providencia,seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f,path_city_San Joaquín_X_Macul,city_origin_Valparaíso,path_city_Vitacura_X_Huechuraba,path_city_Conchalí_X_Independencia,total_minutes,order_id
0,0,0,0,0,0,0,0,0,0,283.223732,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.823597,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,7698,0,0,0.92,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.9024,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,16.0,0,0,0,0,0,0,0,0,0,0,67.684264,e750294655c2c7c34d83cc3181c09de4
1,0,0,0,0,0,0,0,0,0,282.664238,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1.935026,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,7698,0,0,0.92,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.761,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,11.0,0,0,0,0,0,0,0,0,0,0,57.060632,6581174846221cb6c467348e87f57641
3,0,0,0,0,0,0,0,0,0,283.223732,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.820244,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,51,0,0,0.96,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.8776,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1.0,0,0,0,0,0,0,0,0,0,0,52.067742,7d2ed03fe4966083e74b12694b1669d8


In [22]:
modeling.isnull().sum()

path_city_Las Condes_X_Santiago       0
Hour_11                               0
path_city_undefined_X_Vitacura        0
city_destiny_Renca                    0
path_city_undefined_X_Las Condes      0
                                     ..
city_origin_Valparaíso                0
path_city_Vitacura_X_Huechuraba       0
path_city_Conchalí_X_Independencia    0
total_minutes                         0
order_id                              0
Length: 191, dtype: int64

In [23]:
# Getting the data:
X = modeling.drop(['total_minutes', 'order_id'], axis = 1)

y = modeling['total_minutes']

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 451, test_size= .25)

In [25]:
StdSca = StandardScaler()  
X_train[var_num] = pd.DataFrame(StdSca.fit_transform(X_train[var_num]), columns = var_num, index = X_train.index)
X_test[var_num] = pd.DataFrame(StdSca.transform(X_test[var_num]), columns = var_num, index = X_test.index)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [26]:
ofs[var_num] = StdSca.transform(ofs[var_num])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


## Model

In [27]:
CV = cross_validate(estimator = ElasticNet(alpha=0.025, l1_ratio=0.5), 
                    cv = 10, 
                    scoring = ["r2", 'neg_median_absolute_error', 'max_error', 'neg_mean_squared_error'], 
                    return_estimator = True,
                    return_train_score= False,
                    X = X_train,
                    y = y_train)
cv_df = pd.DataFrame(CV)
cv_df

Unnamed: 0,fit_time,score_time,estimator,test_r2,test_neg_median_absolute_error,test_max_error,test_neg_mean_squared_error
0,0.056878,0.0,ElasticNet(alpha=0.025),0.457284,-14.436093,-144.024785,-621.981544
1,0.03649,0.0,ElasticNet(alpha=0.025),0.453773,-13.941866,-182.317981,-651.545323
2,0.048555,0.000502,ElasticNet(alpha=0.025),0.490274,-14.812147,-113.934925,-534.197085
3,0.031257,0.016319,ElasticNet(alpha=0.025),0.458502,-15.104361,-135.668499,-617.202956
4,0.032715,0.0,ElasticNet(alpha=0.025),0.453384,-14.325807,-137.855627,-618.4119
5,0.047121,0.016251,ElasticNet(alpha=0.025),0.47104,-13.983288,-92.451956,-562.448373
6,0.031998,0.0,ElasticNet(alpha=0.025),0.507336,-14.002721,-186.463204,-603.334795
7,0.04762,0.0,ElasticNet(alpha=0.025),0.493237,-15.425112,-190.152178,-616.609847
8,0.046875,0.0,ElasticNet(alpha=0.025),0.486275,-14.035366,-141.360101,-585.801749
9,0.04797,0.0,ElasticNet(alpha=0.025),0.477429,-15.045486,-116.70152,-516.312694


In [28]:
CV = CV["estimator"]

In [29]:
index = cv_df['fit_time'][cv_df.test_r2 == max(cv_df.test_r2)].index.values[0]
index

6

In [30]:
X_ofs = ofs.drop(['total_minutes', 'order_id'], axis = 1)
ofs['prediction'] = CV[index].predict(X_ofs)
ofs = ofs[['order_id','prediction']]
ofs.to_csv(os.path.join(path0, "submitted.csv"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ofs['prediction'] = CV[index].predict(X_ofs)


In [31]:
#Fim