In [1]:
# Data manipulation
import numpy as np
import pandas as pd
from math import *
import seaborn as sns
import networkx as nx
import os
import scipy.stats as stats

# Visualization.
import matplotlib.pyplot as plt

# Saving models
from datetime import datetime
import joblib

# Display all columns
pd.set_option('display.max_columns', 150,
             'display.max_rows', 150)

# ML
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, RepeatedKFold, train_test_split, cross_validate, cross_val_score 
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score, explained_variance_score, mean_squared_error, median_absolute_error#, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn import ensemble
from sklearn.ensemble import RandomForestRegressor


import requests, json
from sqlalchemy import create_engine

#### Load helper functions

In [2]:
%run ./functions.ipynb

In [3]:
path0 = os.getcwd()

In [4]:
full = pd.read_csv(os.path.join(path0, "full_new.csv"))

In [5]:
temp = pd.read_csv(os.path.join(path0, "chile_temp.csv"),  index_col=0)
pre = pd.read_csv(os.path.join(path0, "chile_pre.csv"), index_col=0)

In [6]:
full = pd.merge(full, temp, how='left', on=['county_origin','Date'])
full = pd.merge(full, pre, how='left', on=['county_origin','Date'])

In [7]:
full = zscore(full, cols = ['quantity_KG','quantity_UN', 'distance_car', 'distance_havesine'])

In [8]:
var_cat = ['on_demand', 'seniority', 'Hour', 'Day_of_Week', 'period',
           'county_origin', 'county_destiny', 'state_origin', 'state_destiny',
           'city_origin', 'city_destiny',
           'path_city',
           'path_state', 
           'path_county',
           'same_city', 'same_state', 'same_county',
           'same_neighbourhood'
          ]

var_num = ['quantity_UN', 'quantity_KG', 'UN_plus_KG', 'UN_mult_KG',
           'distance_havesine',
           'found_rate', 'picking_speed','accepted_rate', 'rating',
           'distance_car', 
           'duration',
           'shoppers_number',
           'store_branch_number', 'temperature', 'precipitation'
          ]

In [9]:
dft = pd.get_dummies(full[var_cat], columns= var_cat, drop_first=True)
cat_var = dft.columns

In [10]:
full = pd.get_dummies(full, columns = var_cat)

In [11]:
corr_var = corrX_orig(full[list(cat_var)+list(var_num)], cut = 0.8)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  up = corr_mtx.where(np.triu(np.ones(corr_mtx.shape), k=1).astype(np.bool))


In [12]:
target = ['total_minutes', 'order_id']
full = full[list(set(full[list(cat_var) + list(var_num)].columns) - set(corr_var)) + target]

In [13]:
var_num = list(set(var_num) - set(corr_var))

### Out of Sample

In [14]:
ofs = full[full.total_minutes.isnull()]

In [15]:
ofs.head(3)

Unnamed: 0,city_origin_Providencia,path_city_Viña del Mar_X_undefined,path_city_San Joaquín_X_Ñuñoa,path_city_undefined_X_Conchalí,Hour_3,Hour_13,path_city_Viña del Mar_X_Viña del Mar,path_city_Peñalolén_X_Ñuñoa,path_city_Estación Central_X_Renca,path_city_Lo Barnechea_X_Las Condes,city_origin_undefined,city_destiny_San Ramón,path_city_Vitacura_X_Ñuñoa,period_night,path_city_Providencia_X_Ñuñoa,path_city_Providencia_X_Huechuraba,same_county_1.0,Hour_22,Hour_21,path_county_Provincia de Cordillera_X_Provincia de Santiago,path_city_Las Condes_X_Huechuraba,path_city_Las Condes_X_Lo Barnechea,path_city_Providencia_X_Las Condes,on_demand_True,path_city_Las Condes_X_Santiago,Hour_15,accepted_rate,path_city_Colina_X_Colina,county_destiny_Provincia de Cordillera,path_city_Vitacura_X_Santiago,city_destiny_Peñalolén,path_city_undefined_X_Pudahuel,path_city_Estación Central_X_Independencia,path_city_Macul_X_Providencia,path_city_Estación Central_X_Recoleta,path_city_Ñuñoa_X_Peñalolén,path_city_Ñuñoa_X_Las Condes,period_dawn,path_city_undefined_X_Viña del Mar,path_city_Las Condes_X_Peñalolén,rating,city_destiny_La Granja,path_city_Conchalí_X_Colina,city_origin_Santiago,city_destiny_Providencia,path_city_La Florida_X_undefined,picking_speed,path_city_Santiago_X_San Miguel,path_city_Peñalolén_X_Peñalolén,seniority_50e13ee63f086c2fe84229348bc91b5b,city_destiny_Lo Espejo,path_city_Ñuñoa_X_Ñuñoa,city_destiny_Renca,city_origin_Macul,city_origin_San Joaquín,city_origin_Huechuraba,path_city_San Joaquín_X_San Miguel,city_origin_Recoleta,path_city_Valparaíso_X_Viña del Mar,city_destiny_El Bosque,city_destiny_La Cisterna,path_city_Colina_X_Lo Barnechea,path_city_Vitacura_X_Las Condes,path_city_undefined_X_undefined,path_city_Macul_X_Ñuñoa,path_city_undefined_X_Providencia,city_destiny_Quinta Normal,found_rate,path_city_Concepción_X_San Pedro de la Paz,path_city_Providencia_X_Vitacura,path_city_La Florida_X_Peñalolén,path_city_San Miguel_X_San Joaquín,path_city_Ñuñoa_X_Santiago,path_city_Vitacura_X_Recoleta,city_destiny_Las Condes,...,path_city_Cerrillos_X_undefined,path_city_Vitacura_X_Providencia,path_city_undefined_X_Vitacura,Hour_16,distance_havesine,path_city_Macul_X_Peñalolén,path_city_Vitacura_X_Huechuraba,same_neighbourhood_1.0,path_city_Estación Central_X_Santiago,path_city_La Florida_X_La Florida,city_origin_Estación Central,path_city_San Joaquín_X_Santiago,path_city_Conchalí_X_Conchalí,path_city_Las Condes_X_Providencia,city_destiny_Ñuñoa,path_city_undefined_X_La Florida,path_city_Providencia_X_Macul,Hour_17,path_city_Concepción_X_Concepción,path_city_undefined_X_Cerrillos,path_city_Conchalí_X_Recoleta,Hour_2,path_city_Conchalí_X_Independencia,path_city_Providencia_X_undefined,path_city_Conchalí_X_Renca,path_city_Colina_X_Santiago,path_city_La Serena_X_Coquimbo,path_city_Macul_X_San Miguel,path_city_Peñalolén_X_undefined,path_city_Pudahuel_X_undefined,city_origin_Peñalolén,path_city_undefined_X_Santiago,path_city_Ñuñoa_X_Providencia,path_city_Santiago_X_Providencia,path_city_La Florida_X_Macul,path_city_San Joaquín_X_Providencia,city_origin_Vitacura,Hour_1,path_city_undefined_X_Estación Central,path_city_Lo Barnechea_X_Vitacura,path_county_Provincia de Cordillera_X_Provincia de Cordillera,city_origin_Ñuñoa,path_city_Ñuñoa_X_undefined,path_city_undefined_X_Las Condes,path_city_Macul_X_Macul,path_city_undefined_X_El Bosque,path_city_Vitacura_X_Vitacura,path_city_Peñalolén_X_La Florida,city_destiny_Huechuraba,path_city_undefined_X_San Pedro de la Paz,path_city_undefined_X_Renca,city_origin_La Serena,city_origin_Concepción,path_city_San Joaquín_X_Macul,path_city_Santiago_X_Ñuñoa,city_destiny_Talcahuano,quantity_KG,seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f,Hour_20,city_origin_Coquimbo,path_city_Valparaíso_X_Valparaíso,shoppers_number,path_city_La Florida_X_San Miguel,city_origin_Talcahuano,path_city_undefined_X_Colina,seniority_6c90661e6d2c7579f5ce337c3391dbb9,Hour_14,city_destiny_Cerro Navia,path_city_Providencia_X_Recoleta,county_origin_Provincia de Santiago,path_city_Las Condes_X_Vitacura,path_city_Las Condes_X_Las Condes,path_city_Estación Central_X_Estación Central,total_minutes,order_id
2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0.76,0,0,0,0,0,0,0,0,0,0,0,0,0,4.92,0,0,0,0,0,2.57,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.8313,0,0,0,0,0,0,0,...,0,0,0,0,2.358128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,7698,0,0,0,0,1,0,0,0,0,0,0,,3a226ea48debc0a7ae9950d5540f2f34
5,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.84,0,0,0,0,0,1.82,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.8946,0,0,0,0,0,0,0,...,0,0,0,0,4.190793,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,699,0,0,0,1,0,0,0,1,0,0,0,,9bf29b56619fcaf60b52690a848e10bb
9,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.84,0,0,0,0,0,1.67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.9363,0,0,0,0,0,0,0,...,0,0,0,0,2.512359,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.852,0,0,0,0,7698,0,0,0,1,0,0,0,1,0,0,0,,299d948a5fd2cf2a921894b9bd24b94e


In [16]:
ofs.isnull().sum()

city_origin_Providencia                             0
path_city_Viña del Mar_X_undefined                  0
path_city_San Joaquín_X_Ñuñoa                       0
path_city_undefined_X_Conchalí                      0
Hour_3                                              0
                                                 ... 
path_city_Las Condes_X_Vitacura                     0
path_city_Las Condes_X_Las Condes                   0
path_city_Estación Central_X_Estación Central       0
total_minutes                                    1880
order_id                                            0
Length: 191, dtype: int64

### Modeling data

In [17]:
modeling = full[~full.total_minutes.isnull()]

In [18]:
modeling.head(3)

Unnamed: 0,city_origin_Providencia,path_city_Viña del Mar_X_undefined,path_city_San Joaquín_X_Ñuñoa,path_city_undefined_X_Conchalí,Hour_3,Hour_13,path_city_Viña del Mar_X_Viña del Mar,path_city_Peñalolén_X_Ñuñoa,path_city_Estación Central_X_Renca,path_city_Lo Barnechea_X_Las Condes,city_origin_undefined,city_destiny_San Ramón,path_city_Vitacura_X_Ñuñoa,period_night,path_city_Providencia_X_Ñuñoa,path_city_Providencia_X_Huechuraba,same_county_1.0,Hour_22,Hour_21,path_county_Provincia de Cordillera_X_Provincia de Santiago,path_city_Las Condes_X_Huechuraba,path_city_Las Condes_X_Lo Barnechea,path_city_Providencia_X_Las Condes,on_demand_True,path_city_Las Condes_X_Santiago,Hour_15,accepted_rate,path_city_Colina_X_Colina,county_destiny_Provincia de Cordillera,path_city_Vitacura_X_Santiago,city_destiny_Peñalolén,path_city_undefined_X_Pudahuel,path_city_Estación Central_X_Independencia,path_city_Macul_X_Providencia,path_city_Estación Central_X_Recoleta,path_city_Ñuñoa_X_Peñalolén,path_city_Ñuñoa_X_Las Condes,period_dawn,path_city_undefined_X_Viña del Mar,path_city_Las Condes_X_Peñalolén,rating,city_destiny_La Granja,path_city_Conchalí_X_Colina,city_origin_Santiago,city_destiny_Providencia,path_city_La Florida_X_undefined,picking_speed,path_city_Santiago_X_San Miguel,path_city_Peñalolén_X_Peñalolén,seniority_50e13ee63f086c2fe84229348bc91b5b,city_destiny_Lo Espejo,path_city_Ñuñoa_X_Ñuñoa,city_destiny_Renca,city_origin_Macul,city_origin_San Joaquín,city_origin_Huechuraba,path_city_San Joaquín_X_San Miguel,city_origin_Recoleta,path_city_Valparaíso_X_Viña del Mar,city_destiny_El Bosque,city_destiny_La Cisterna,path_city_Colina_X_Lo Barnechea,path_city_Vitacura_X_Las Condes,path_city_undefined_X_undefined,path_city_Macul_X_Ñuñoa,path_city_undefined_X_Providencia,city_destiny_Quinta Normal,found_rate,path_city_Concepción_X_San Pedro de la Paz,path_city_Providencia_X_Vitacura,path_city_La Florida_X_Peñalolén,path_city_San Miguel_X_San Joaquín,path_city_Ñuñoa_X_Santiago,path_city_Vitacura_X_Recoleta,city_destiny_Las Condes,...,path_city_Cerrillos_X_undefined,path_city_Vitacura_X_Providencia,path_city_undefined_X_Vitacura,Hour_16,distance_havesine,path_city_Macul_X_Peñalolén,path_city_Vitacura_X_Huechuraba,same_neighbourhood_1.0,path_city_Estación Central_X_Santiago,path_city_La Florida_X_La Florida,city_origin_Estación Central,path_city_San Joaquín_X_Santiago,path_city_Conchalí_X_Conchalí,path_city_Las Condes_X_Providencia,city_destiny_Ñuñoa,path_city_undefined_X_La Florida,path_city_Providencia_X_Macul,Hour_17,path_city_Concepción_X_Concepción,path_city_undefined_X_Cerrillos,path_city_Conchalí_X_Recoleta,Hour_2,path_city_Conchalí_X_Independencia,path_city_Providencia_X_undefined,path_city_Conchalí_X_Renca,path_city_Colina_X_Santiago,path_city_La Serena_X_Coquimbo,path_city_Macul_X_San Miguel,path_city_Peñalolén_X_undefined,path_city_Pudahuel_X_undefined,city_origin_Peñalolén,path_city_undefined_X_Santiago,path_city_Ñuñoa_X_Providencia,path_city_Santiago_X_Providencia,path_city_La Florida_X_Macul,path_city_San Joaquín_X_Providencia,city_origin_Vitacura,Hour_1,path_city_undefined_X_Estación Central,path_city_Lo Barnechea_X_Vitacura,path_county_Provincia de Cordillera_X_Provincia de Cordillera,city_origin_Ñuñoa,path_city_Ñuñoa_X_undefined,path_city_undefined_X_Las Condes,path_city_Macul_X_Macul,path_city_undefined_X_El Bosque,path_city_Vitacura_X_Vitacura,path_city_Peñalolén_X_La Florida,city_destiny_Huechuraba,path_city_undefined_X_San Pedro de la Paz,path_city_undefined_X_Renca,city_origin_La Serena,city_origin_Concepción,path_city_San Joaquín_X_Macul,path_city_Santiago_X_Ñuñoa,city_destiny_Talcahuano,quantity_KG,seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f,Hour_20,city_origin_Coquimbo,path_city_Valparaíso_X_Valparaíso,shoppers_number,path_city_La Florida_X_San Miguel,city_origin_Talcahuano,path_city_undefined_X_Colina,seniority_6c90661e6d2c7579f5ce337c3391dbb9,Hour_14,city_destiny_Cerro Navia,path_city_Providencia_X_Recoleta,county_origin_Provincia de Santiago,path_city_Las Condes_X_Vitacura,path_city_Las Condes_X_Las Condes,path_city_Estación Central_X_Estación Central,total_minutes,order_id
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0.92,0,0,0,1,0,0,0,0,0,0,0,0,0,4.76,0,0,0,0,0,1.3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.9024,0,0,0,0,0,0,0,...,0,0,0,0,1.823597,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.756,0,1,0,0,7698,0,0,0,1,0,0,0,1,0,0,0,67.684264,e750294655c2c7c34d83cc3181c09de4
1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0.92,0,0,0,0,0,0,0,0,0,0,1,0,0,4.96,0,0,0,0,0,2.54,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.761,0,0,0,0,0,0,0,...,0,0,0,0,1.935026,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,7698,0,0,0,0,0,0,0,1,0,0,0,57.060632,6581174846221cb6c467348e87f57641
3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0.96,0,0,0,0,0,0,0,0,0,0,0,0,0,4.76,0,0,0,0,0,2.8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.8776,0,0,0,0,0,0,0,...,0,0,0,0,3.820244,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,51,0,0,0,0,0,0,0,1,0,0,0,52.067742,7d2ed03fe4966083e74b12694b1669d8


In [19]:
modeling.isnull().sum()

city_origin_Providencia                          0
path_city_Viña del Mar_X_undefined               0
path_city_San Joaquín_X_Ñuñoa                    0
path_city_undefined_X_Conchalí                   0
Hour_3                                           0
                                                ..
path_city_Las Condes_X_Vitacura                  0
path_city_Las Condes_X_Las Condes                0
path_city_Estación Central_X_Estación Central    0
total_minutes                                    0
order_id                                         0
Length: 191, dtype: int64

In [20]:
# Getting the data:
X = modeling.drop(['total_minutes', 'order_id'], axis = 1)

y = modeling['total_minutes']

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 451, test_size= .25)

In [22]:
StdSca = StandardScaler()  
X_train[var_num] = pd.DataFrame(StdSca.fit_transform(X_train[var_num]), columns = var_num, index = X_train.index)
X_test[var_num] = pd.DataFrame(StdSca.transform(X_test[var_num]), columns = var_num, index = X_test.index)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [23]:
ofs[var_num] = StdSca.transform(ofs[var_num])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


## Model

In [24]:
CV = cross_validate(estimator = ElasticNet(alpha=0.025, l1_ratio=0.5), 
                    cv = 5, 
                    scoring = ["r2", 'neg_median_absolute_error', 'max_error', 'neg_mean_squared_error'], 
                    return_estimator = True,
                    return_train_score= False,
                    X = X_train,
                    y = y_train)
cv_df = pd.DataFrame(CV)
cv_df

Unnamed: 0,fit_time,score_time,estimator,test_r2,test_neg_median_absolute_error,test_max_error,test_neg_mean_squared_error
0,0.054878,0.002989,ElasticNet(alpha=0.025),0.456175,-14.301226,-181.51915,-636.190347
1,0.024418,0.0,ElasticNet(alpha=0.025),0.471481,-14.689994,-135.184829,-578.183877
2,0.032008,0.0,ElasticNet(alpha=0.025),0.462688,-14.07046,-137.949907,-589.936607
3,0.04773,0.0,ElasticNet(alpha=0.025),0.501535,-14.472364,-191.081681,-610.460041
4,0.031247,0.0,ElasticNet(alpha=0.025),0.480891,-14.56991,-140.957023,-552.165928


In [25]:
CV = CV["estimator"]

In [26]:
X_ofs = ofs.drop(['total_minutes', 'order_id'], axis = 1)
ofs['prediction'] = CV[3].predict(X_ofs)
ofs = ofs[['order_id','prediction']]
ofs.to_csv(os.path.join(path0, "submitted.csv"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ofs['prediction'] = CV[3].predict(X_ofs)


In [27]:
#Fim