In [1]:
# Data manipulation
import numpy as np
import pandas as pd
from math import *
import seaborn as sns
import networkx as nx
import os
import scipy.stats as stats

# Visualization.
import matplotlib.pyplot as plt

# Saving models
from datetime import datetime
import joblib

# Display all columns
pd.set_option('display.max_columns', 150,
             'display.max_rows', 150)

# ML
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, RepeatedKFold, train_test_split, cross_validate, cross_val_score 
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score, explained_variance_score, mean_squared_error, median_absolute_error#, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn import ensemble
from sklearn.ensemble import RandomForestRegressor


import requests, json
from sqlalchemy import create_engine

#### Load helper functions

In [2]:
%run ./functions.ipynb

In [3]:
path0 = os.getcwd()

In [4]:
full = pd.read_csv(os.path.join(path0, "Full.csv"))

In [5]:
full.head(50)

Unnamed: 0.1,Unnamed: 0,order_id,lat_destination,lng_destination,promised_time,on_demand,shopper_id,store_branch_id,total_minutes,quantity_UN,quantity_KG,is_more_UN,UN_plus_KG,UN_mult_KG,seniority,found_rate,picking_speed,accepted_rate,rating,store_id,lat_origin,lng_origin,Hour,Month,Day_of_Week,Year,Date,period,distance_havesine,distance_car,weight_car,duration,city_origin,state_origin,county_origin,neighbourhood_origin,city_destiny,state_destiny,county_destiny,neighbourhood_destiny,same_city,same_state,same_county,same_neighbourhood,path_city,path_state,path_county,shoppers_number,store_branch_number
0,0,e750294655c2c7c34d83cc3181c09de4,-33.501675,-70.579369,2019-10-18 20:48:00+00:00,True,e63bc83a1a952fa2b3cc9d558fb943cf,65ded5353c5ee48d0b7d48c591b8f430,67.684264,16.0,2.756,1.0,18.756,44.096,6c90661e6d2c7579f5ce337c3391dbb9,0.9024,1.3,0.92,4.76,c4ca4238a0b923820dcc509a6f75849b,-33.48528,-70.57925,20,10,4,2019,18,night,1.823597,3367.1,380.8,380.8,Macul,Región Metropolitana de Santiago,Provincia de Santiago,Villa Universidad Católica,Peñalolén,Región Metropolitana de Santiago,Provincia de Santiago,Conjunto San Luis,0.0,1.0,1.0,0.0,Macul_X_Peñalolén,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698
1,1,6581174846221cb6c467348e87f57641,-33.440584,-70.556283,2019-10-19 01:00:00+00:00,False,195f9e9d84a4ba9033c4b6a756334d8b,45fbc6d3e05ebd93369ce542e8f2322d,57.060632,11.0,0.0,1.0,11.0,0.0,41dc7c9e385c4d2b6c1f7836973951bf,0.761,2.54,0.92,4.96,c4ca4238a0b923820dcc509a6f75849b,-33.441246,-70.53545,1,10,5,2019,19,dawn,1.935026,2373.9,229.1,229.1,undefined,Región Metropolitana de Santiago,Provincia de Santiago,La Reina,undefined,Región Metropolitana de Santiago,Provincia de Santiago,La Reina,0.0,1.0,1.0,1.0,undefined_X_undefined,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698
2,2,3a226ea48debc0a7ae9950d5540f2f34,-32.987022,-71.544842,2019-10-19 14:54:00+00:00,True,a5b9ddc0d82e61582fca19ad43dbaacb,07563a3fe3bbe7e3ba84431ad9d055af,,18.0,0.0,1.0,18.0,0.0,50e13ee63f086c2fe84229348bc91b5b,0.8313,2.57,0.76,4.92,c4ca4238a0b923820dcc509a6f75849b,-33.008213,-71.545615,14,10,5,2019,19,afternoon,2.358128,2930.5,298.2,298.2,Viña del Mar,Región de Valparaíso,Provincia de Valparaíso,Población Británica,Viña del Mar,Región de Valparaíso,Provincia de Valparaíso,Población Naval Las Salinas,1.0,1.0,1.0,0.0,Viña del Mar_X_Viña del Mar,Región de Valparaíso_X_Región de Valparaíso,Provincia de Valparaíso_X_Provincia de Valparaíso,7698,7698
3,3,7d2ed03fe4966083e74b12694b1669d8,-33.328075,-70.512659,2019-10-18 21:47:00+00:00,True,d0b3f6bf7e249e5ebb8d3129341773a2,f1748d6b0fd9d439f71450117eba2725,52.067742,1.0,0.0,1.0,1.0,0.0,41dc7c9e385c4d2b6c1f7836973951bf,0.8776,2.8,0.96,4.76,f718499c1c8cef6730f9fd03c8125cab,-33.355258,-70.537787,21,10,4,2019,18,night,3.820244,5632.1,3826.0,549.3,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,Las Pataguas,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,Alpes Suizos,1.0,1.0,1.0,0.0,Lo Barnechea_X_Lo Barnechea,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,51,51
4,4,b4b2682d77118155fe4716300ccf7f39,-33.403239,-70.56402,2019-10-19 20:00:00+00:00,False,5c5199ce02f7b77caa9c2590a39ad27d,1f0e3dad99908345f7439f8ffabdffc4,140.724822,91.0,6.721,1.0,97.721,611.611,50e13ee63f086c2fe84229348bc91b5b,0.7838,2.4,0.96,4.96,c4ca4238a0b923820dcc509a6f75849b,-33.386547,-70.568075,20,10,5,2019,19,night,1.894474,2939.7,223.8,223.8,Vitacura,Región Metropolitana de Santiago,Provincia de Santiago,,Las Condes,Región Metropolitana de Santiago,Provincia de Santiago,,0.0,1.0,1.0,0.0,Vitacura_X_Las Condes,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698
5,5,9bf29b56619fcaf60b52690a848e10bb,-33.330724,-70.547074,2019-10-18 23:47:00+00:00,True,61e4ad15c3ff928840ebd34407055b33,33e75ff09dd601bbe69f351039152189,,15.0,0.0,1.0,15.0,0.0,6c90661e6d2c7579f5ce337c3391dbb9,0.8946,1.82,1.0,4.84,a87ff679a2f3e71d9181a67b7542122c,-33.357558,-70.515415,23,10,4,2019,18,night,4.190793,5786.8,3855.6,576.4,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,,1.0,1.0,1.0,0.0,Lo Barnechea_X_Lo Barnechea,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,699,699
6,6,2c036d7b5db4b3cb8abf44d9fa46b138,-33.44773,-70.573825,2019-10-19 14:50:00+00:00,True,5c15f25cf0be9893ad4780fa050d8fa3,5751ec3e9a4feab575962e78e006250d,45.535737,7.0,2.0,1.0,9.0,14.0,6c90661e6d2c7579f5ce337c3391dbb9,0.8713,1.31,0.88,4.96,c4ca4238a0b923820dcc509a6f75849b,-33.463001,-70.575301,14,10,5,2019,19,afternoon,1.70406,4255.7,368.9,368.9,Ñuñoa,Región Metropolitana de Santiago,Provincia de Santiago,,Ñuñoa,Región Metropolitana de Santiago,Provincia de Santiago,,1.0,1.0,1.0,0.0,Ñuñoa_X_Ñuñoa,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698
7,7,d35c25397869d7effb3f3c140d248925,-33.450098,-70.603949,2019-10-19 02:00:00+00:00,False,3c4885d7dd4b68021456be3c24a034dc,e4da3b7fbbce2345d7772b0674a318d5,121.394073,62.0,1.118,1.0,63.118,69.316,6c90661e6d2c7579f5ce337c3391dbb9,0.819,1.27,1.0,4.72,c4ca4238a0b923820dcc509a6f75849b,-33.474,-70.599,2,10,5,2019,19,dawn,2.697946,3545.2,319.9,319.9,Macul,Región Metropolitana de Santiago,Provincia de Santiago,,Ñuñoa,Región Metropolitana de Santiago,Provincia de Santiago,Población Empleados Públicos y Periodistas Chi...,0.0,1.0,1.0,0.0,Macul_X_Ñuñoa,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698
8,8,93d769afdc93ad42811c3a3099b4d11e,-33.363772,-70.548334,2019-10-18 21:18:00+00:00,True,278e1e3dbd7de0fc653bf4500700c6ef,33e75ff09dd601bbe69f351039152189,72.446429,15.0,0.25,1.0,15.25,3.75,6c90661e6d2c7579f5ce337c3391dbb9,0.8755,1.0,1.0,4.92,a87ff679a2f3e71d9181a67b7542122c,-33.357558,-70.515415,21,10,4,2019,18,night,3.135393,4635.9,493.1,484.7,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,,Vitacura,Región Metropolitana de Santiago,Provincia de Santiago,,0.0,1.0,1.0,0.0,Lo Barnechea_X_Vitacura,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,699,699
9,9,299d948a5fd2cf2a921894b9bd24b94e,-33.349922,-70.522841,2019-10-18 21:54:00+00:00,True,1456fc09701783b29f69e8f68c029879,1679091c5a880faf6fb5e6087eb1b2dc,,94.0,2.852,1.0,96.852,268.088,6c90661e6d2c7579f5ce337c3391dbb9,0.9363,1.67,1.0,4.84,c4ca4238a0b923820dcc509a6f75849b,-33.370765,-70.51242,21,10,4,2019,18,night,2.512359,3141.8,249.5,249.5,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,,Lo Barnechea,Región Metropolitana de Santiago,Provincia de Santiago,,1.0,1.0,1.0,0.0,Lo Barnechea_X_Lo Barnechea,Región Metropolitana de Santiago_X_Región Metr...,Provincia de Santiago_X_Provincia de Santiago,7698,7698


In [6]:
full.columns

Index(['Unnamed: 0', 'order_id', 'lat_destination', 'lng_destination',
       'promised_time', 'on_demand', 'shopper_id', 'store_branch_id',
       'total_minutes', 'quantity_UN', 'quantity_KG', 'is_more_UN',
       'UN_plus_KG', 'UN_mult_KG', 'seniority', 'found_rate', 'picking_speed',
       'accepted_rate', 'rating', 'store_id', 'lat_origin', 'lng_origin',
       'Hour', 'Month', 'Day_of_Week', 'Year', 'Date', 'period',
       'distance_havesine', 'distance_car', 'weight_car', 'duration',
       'city_origin', 'state_origin', 'county_origin', 'neighbourhood_origin',
       'city_destiny', 'state_destiny', 'county_destiny',
       'neighbourhood_destiny', 'same_city', 'same_state', 'same_county',
       'same_neighbourhood', 'path_city', 'path_state', 'path_county',
       'shoppers_number', 'store_branch_number'],
      dtype='object')

In [7]:
#full['duration'] = (full['duration']*10)/6

In [8]:
temp = pd.read_csv(os.path.join(path0, "chile_temp.csv"),  index_col=0)
pre = pd.read_csv(os.path.join(path0, "chile_pre.csv"), index_col=0)

In [9]:
full = pd.merge(full, temp, how='left', on=['county_origin','Date'])
full = pd.merge(full, pre, how='left', on=['county_origin','Date'])

In [10]:
full = zscore(full, cols = ['quantity_KG','quantity_UN', 'distance_car', 'distance_havesine'])

In [11]:
full.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9438 entries, 0 to 9977
Data columns (total 55 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             9438 non-null   int64  
 1   order_id               9438 non-null   object 
 2   lat_destination        9438 non-null   float64
 3   lng_destination        9438 non-null   float64
 4   promised_time          9438 non-null   object 
 5   on_demand              9438 non-null   bool   
 6   shopper_id             9438 non-null   object 
 7   store_branch_id        9438 non-null   object 
 8   total_minutes          7558 non-null   float64
 9   quantity_UN            9438 non-null   float64
 10  quantity_KG            9438 non-null   float64
 11  is_more_UN             9438 non-null   float64
 12  UN_plus_KG             9438 non-null   float64
 13  UN_mult_KG             9438 non-null   float64
 14  seniority              9438 non-null   object 
 15  foun

In [12]:
full.isnull().sum()

Unnamed: 0                  0
order_id                    0
lat_destination             0
lng_destination             0
promised_time               0
on_demand                   0
shopper_id                  0
store_branch_id             0
total_minutes            1880
quantity_UN                 0
quantity_KG                 0
is_more_UN                  0
UN_plus_KG                  0
UN_mult_KG                  0
seniority                   0
found_rate                  0
picking_speed               0
accepted_rate               0
rating                      0
store_id                    0
lat_origin                  0
lng_origin                  0
Hour                        0
Month                       0
Day_of_Week                 0
Year                        0
Date                        0
period                      0
distance_havesine           0
distance_car                0
weight_car                  0
duration                    0
city_origin                 0
state_orig

In [13]:
var_cat = ['on_demand', 'seniority', 'Hour', 'Day_of_Week', 'period',
           'county_origin', 'county_destiny', 'state_origin', 'state_destiny',
           'city_origin', 'city_destiny',
           #'path_city',
           #'path_state', 
           #'path_county',
           'same_city', 'same_state', 'same_county',
           'same_neighbourhood'
          ]

var_num = ['quantity_UN', 'quantity_KG', 'UN_plus_KG', 'UN_mult_KG',
           'distance_havesine',
           'found_rate', 'picking_speed','accepted_rate', 'rating',
           'distance_car', 
           'duration',
           'shoppers_number',
           'store_branch_number', 'temperature', 'precipitation'
          ]

In [14]:
dft = pd.get_dummies(full[var_cat], columns= var_cat, drop_first=True)
cat_var = dft.columns

In [15]:
full = pd.get_dummies(full, columns = var_cat)

In [16]:
corr_var = corrX_orig(full[list(cat_var)+list(var_num)], cut = 0.8)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  up = corr_mtx.where(np.triu(np.ones(corr_mtx.shape), k=1).astype(np.bool))


In [17]:
target = ['total_minutes', 'order_id']
full = full[list(set(full[list(cat_var) + list(var_num)].columns) - set(corr_var)) + target]

In [18]:
var_num = list(set(var_num) - set(corr_var))

### Out of Sample

In [19]:
ofs = full[full.total_minutes.isnull()]

In [20]:
ofs.head(3)

Unnamed: 0,city_destiny_Estación Central,Hour_11,city_destiny_La Florida,same_city_1.0,city_destiny_Conchalí,Hour_18,seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f,city_origin_Ñuñoa,Hour_13,Day_of_Week_6,quantity_KG,temperature,city_origin_undefined,city_destiny_Talcahuano,county_destiny_Provincia de Cordillera,city_destiny_Peñalolén,shoppers_number,city_destiny_Lo Prado,city_origin_La Serena,city_destiny_Renca,found_rate,Hour_2,city_destiny_San Joaquín,city_destiny_Huechuraba,rating,Hour_22,city_origin_Recoleta,city_origin_San Joaquín,city_origin_Colina,seniority_6c90661e6d2c7579f5ce337c3391dbb9,city_destiny_Concepción,city_destiny_Vitacura,city_origin_Valparaíso,picking_speed,city_origin_Providencia,seniority_50e13ee63f086c2fe84229348bc91b5b,on_demand_True,city_destiny_La Cisterna,city_destiny_Ñuñoa,city_destiny_Pudahuel,city_destiny_Recoleta,Day_of_Week_5,city_origin_Santiago,city_destiny_Quinta Normal,city_destiny_San Pedro de la Paz,city_destiny_Macul,city_destiny_Providencia,city_origin_Talcahuano,county_origin_Provincia de Cordillera,city_origin_Las Condes,Hour_19,city_destiny_Lo Espejo,city_destiny_El Bosque,city_destiny_undefined,city_destiny_Valparaíso,Hour_15,city_destiny_Lo Barnechea,city_destiny_San Miguel,city_destiny_Coquimbo,city_origin_San Pedro de la Paz,city_destiny_Independencia,distance_havesine,city_origin_Huechuraba,city_origin_Conchalí,city_origin_Macul,city_origin_Estación Central,same_neighbourhood_1.0,period_night,city_destiny_San Ramón,period_dawn,Hour_12,county_origin_Provincia de Santiago,Hour_23,city_origin_Viña del Mar,Hour_3,city_destiny_Cerro Navia,city_origin_Peñalolén,city_destiny_Santiago,Hour_16,city_destiny_La Granja,city_origin_Pudahuel,same_county_1.0,Hour_14,city_destiny_Las Condes,city_destiny_La Pintana,Hour_1,city_origin_Coquimbo,city_origin_San Miguel,Hour_21,accepted_rate,city_destiny_La Serena,Hour_17,county_destiny_Provincia de Maipo,Hour_20,city_origin_Concepción,quantity_UN,city_origin_Vitacura,total_minutes,order_id
2,0,0,0,1,0,0,0,0,0,0,0.0,285.531514,0,0,0,0,7698,0,0,0,0.8313,0,0,0,4.92,0,0,0,0,0,0,0,0,2.57,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.358128,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0.76,0,0,0,0,0,18.0,0,,3a226ea48debc0a7ae9950d5540f2f34
5,0,0,0,1,0,0,0,0,0,0,0.0,283.223732,0,0,0,0,699,0,0,0,0.8946,0,0,0,4.84,0,0,0,0,1,0,0,0,1.82,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,4.190793,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1.0,0,0,0,0,0,15.0,0,,9bf29b56619fcaf60b52690a848e10bb
9,0,0,0,1,0,0,0,0,0,0,2.852,283.223732,0,0,0,0,7698,0,0,0,0.9363,0,0,0,4.84,0,0,0,0,1,0,0,0,1.67,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2.512359,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.0,0,0,0,0,0,94.0,0,,299d948a5fd2cf2a921894b9bd24b94e


In [21]:
ofs.isnull().sum()

city_destiny_Estación Central                    0
Hour_11                                          0
city_destiny_La Florida                          0
same_city_1.0                                    0
city_destiny_Conchalí                            0
Hour_18                                          0
seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f       0
city_origin_Ñuñoa                                0
Hour_13                                          0
Day_of_Week_6                                    0
quantity_KG                                      0
temperature                                      0
city_origin_undefined                            0
city_destiny_Talcahuano                          0
county_destiny_Provincia de Cordillera           0
city_destiny_Peñalolén                           0
shoppers_number                                  0
city_destiny_Lo Prado                            0
city_origin_La Serena                            0
city_destiny_Renca             

### Modeling data

In [22]:
modeling = full[~full.total_minutes.isnull()]

In [23]:
modeling.head(3)

Unnamed: 0,city_destiny_Estación Central,Hour_11,city_destiny_La Florida,same_city_1.0,city_destiny_Conchalí,Hour_18,seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f,city_origin_Ñuñoa,Hour_13,Day_of_Week_6,quantity_KG,temperature,city_origin_undefined,city_destiny_Talcahuano,county_destiny_Provincia de Cordillera,city_destiny_Peñalolén,shoppers_number,city_destiny_Lo Prado,city_origin_La Serena,city_destiny_Renca,found_rate,Hour_2,city_destiny_San Joaquín,city_destiny_Huechuraba,rating,Hour_22,city_origin_Recoleta,city_origin_San Joaquín,city_origin_Colina,seniority_6c90661e6d2c7579f5ce337c3391dbb9,city_destiny_Concepción,city_destiny_Vitacura,city_origin_Valparaíso,picking_speed,city_origin_Providencia,seniority_50e13ee63f086c2fe84229348bc91b5b,on_demand_True,city_destiny_La Cisterna,city_destiny_Ñuñoa,city_destiny_Pudahuel,city_destiny_Recoleta,Day_of_Week_5,city_origin_Santiago,city_destiny_Quinta Normal,city_destiny_San Pedro de la Paz,city_destiny_Macul,city_destiny_Providencia,city_origin_Talcahuano,county_origin_Provincia de Cordillera,city_origin_Las Condes,Hour_19,city_destiny_Lo Espejo,city_destiny_El Bosque,city_destiny_undefined,city_destiny_Valparaíso,Hour_15,city_destiny_Lo Barnechea,city_destiny_San Miguel,city_destiny_Coquimbo,city_origin_San Pedro de la Paz,city_destiny_Independencia,distance_havesine,city_origin_Huechuraba,city_origin_Conchalí,city_origin_Macul,city_origin_Estación Central,same_neighbourhood_1.0,period_night,city_destiny_San Ramón,period_dawn,Hour_12,county_origin_Provincia de Santiago,Hour_23,city_origin_Viña del Mar,Hour_3,city_destiny_Cerro Navia,city_origin_Peñalolén,city_destiny_Santiago,Hour_16,city_destiny_La Granja,city_origin_Pudahuel,same_county_1.0,Hour_14,city_destiny_Las Condes,city_destiny_La Pintana,Hour_1,city_origin_Coquimbo,city_origin_San Miguel,Hour_21,accepted_rate,city_destiny_La Serena,Hour_17,county_destiny_Provincia de Maipo,Hour_20,city_origin_Concepción,quantity_UN,city_origin_Vitacura,total_minutes,order_id
0,0,0,0,0,0,0,0,0,0,0,2.756,283.223732,0,0,0,1,7698,0,0,0,0.9024,0,0,0,4.76,0,0,0,0,1,0,0,0,1.3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.823597,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.92,0,0,0,1,0,16.0,0,67.684264,e750294655c2c7c34d83cc3181c09de4
1,0,0,0,0,0,0,0,0,0,0,0.0,282.664238,1,0,0,0,7698,0,0,0,0.761,0,0,0,4.96,0,0,0,0,0,0,0,0,2.54,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1.935026,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0.92,0,0,0,0,0,11.0,0,57.060632,6581174846221cb6c467348e87f57641
3,0,0,0,1,0,0,0,0,0,0,0.0,283.223732,0,0,0,0,51,0,0,0,0.8776,0,0,0,4.76,0,0,0,0,0,0,0,0,2.8,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3.820244,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.96,0,0,0,0,0,1.0,0,52.067742,7d2ed03fe4966083e74b12694b1669d8


In [24]:
modeling.isnull().sum()

city_destiny_Estación Central                 0
Hour_11                                       0
city_destiny_La Florida                       0
same_city_1.0                                 0
city_destiny_Conchalí                         0
Hour_18                                       0
seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f    0
city_origin_Ñuñoa                             0
Hour_13                                       0
Day_of_Week_6                                 0
quantity_KG                                   0
temperature                                   0
city_origin_undefined                         0
city_destiny_Talcahuano                       0
county_destiny_Provincia de Cordillera        0
city_destiny_Peñalolén                        0
shoppers_number                               0
city_destiny_Lo Prado                         0
city_origin_La Serena                         0
city_destiny_Renca                            0
found_rate                              

In [25]:
# Getting the data:
X = modeling.drop(['total_minutes', 'order_id'], axis = 1)

y = modeling['total_minutes']

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 451, test_size= .25)

In [27]:
StdSca = StandardScaler()  
X_train[var_num] = pd.DataFrame(StdSca.fit_transform(X_train[var_num]), columns = var_num, index = X_train.index)
X_test[var_num] = pd.DataFrame(StdSca.transform(X_test[var_num]), columns = var_num, index = X_test.index)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [28]:
ofs[var_num] = StdSca.transform(ofs[var_num])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


## Model

In [29]:
CV = cross_validate(estimator = ElasticNet(alpha=0.025, l1_ratio=0.5), 
                    cv = 10, 
                    scoring = ["r2", 'neg_median_absolute_error', 'max_error', 'neg_mean_squared_error'], 
                    return_estimator = True,
                    return_train_score= False,
                    X = X_train,
                    y = y_train)
cv_df = pd.DataFrame(CV)
cv_df

Unnamed: 0,fit_time,score_time,estimator,test_r2,test_neg_median_absolute_error,test_max_error,test_neg_mean_squared_error
0,0.031419,0.008023,ElasticNet(alpha=0.025),0.456722,-14.397391,-143.849562,-622.624676
1,0.027079,0.008005,ElasticNet(alpha=0.025),0.453719,-14.305165,-183.920437,-651.609495
2,0.031709,0.005429,ElasticNet(alpha=0.025),0.485983,-14.754296,-114.080775,-538.693607
3,0.02424,0.008894,ElasticNet(alpha=0.025),0.459445,-15.195084,-128.652253,-616.127837
4,0.033369,0.0,ElasticNet(alpha=0.025),0.456278,-14.743521,-136.69577,-615.137882
5,0.031318,0.00818,ElasticNet(alpha=0.025),0.47608,-14.079491,-89.814005,-557.089285
6,0.024687,0.009544,ElasticNet(alpha=0.025),0.510836,-14.126066,-187.945275,-599.048253
7,0.033228,0.0,ElasticNet(alpha=0.025),0.498694,-15.434192,-187.442585,-609.969516
8,0.026996,0.00801,ElasticNet(alpha=0.025),0.4849,-14.0337,-143.6855,-587.369873
9,0.024868,0.008172,ElasticNet(alpha=0.025),0.47373,-15.058386,-114.894631,-519.96748


In [30]:
CV = CV["estimator"]

In [31]:
cv_df['test_max_error'] = abs(cv_df['test_max_error'])
index = cv_df['fit_time'][cv_df.test_max_error == min(cv_df.test_max_error)].index.values[0]
index

5

In [32]:
X_ofs = ofs.drop(['total_minutes', 'order_id'], axis = 1)
ofs['prediction'] = CV[index].predict(X_ofs)
ofs = ofs[['order_id','prediction']]
ofs.to_csv(os.path.join(path0, "submitted.csv"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ofs['prediction'] = CV[index].predict(X_ofs)


In [33]:
#Fim