In [4]:
# Data manipulation
import numpy as np
import pandas as pd
from math import *
import seaborn as sns
import networkx as nx
import osmnx as ox
import folium

# Visualization.
import matplotlib.pyplot as plt

# Saving models
from datetime import datetime
import joblib

# Display all columns
pd.set_option('display.max_columns', 150,
             'display.max_rows', 150)

# ML
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, RepeatedKFold, train_test_split, cross_validate, cross_val_score 
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score, explained_variance_score, mean_squared_error, median_absolute_error
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn import ensemble

import requests, json
from sqlalchemy import create_engine

In [5]:
full = pd.read_csv("/Users/aurelianosancho/Documents/GitHub/teste_cornershop/full.csv")

In [None]:
var_cat = ['on_demand', 'seniority', 'Hour', 'Day_of_Week', 'period']
var_num = ['quantity_UN', 'quantity_KG', 'UN_plus_KG', 'UN_mult_KG',
           'distance_havesine','found_rate', 'picking_speed','accepted_rate', 'rating',
           'distance_car', 'weight_car', 'duration', 'dif_duration']

In [None]:
full = pd.get_dummies(full, columns = var_cat)

In [None]:
cdrop = ['order_id', 'shopper_id', 'store_branch_id', 'store_id',
         'Month', 'Year', 'Date']

In [None]:
full = full.drop(cdrop, axis = 1)

In [None]:
var = ['total_minutes', 'quantity_UN',
       'quantity_KG', 'is_more_UN', 'UN_plus_KG', 'UN_mult_KG', 'found_rate',
       'picking_speed', 'accepted_rate', 'rating',
       'distance', 
       'on_demand_False', 'on_demand_True',
       'seniority_41dc7c9e385c4d2b6c1f7836973951bf',
       'seniority_50e13ee63f086c2fe84229348bc91b5b',
       'seniority_6c90661e6d2c7579f5ce337c3391dbb9',
       'seniority_bb29b8d0d196b5db5a5350e5e3ae2b1f', 'Hour_0', 'Hour_1',
       'Hour_2', 'Hour_3', 'Hour_11', 'Hour_12', 'Hour_13', 'Hour_14',
       'Hour_15', 'Hour_16', 'Hour_17', 'Hour_18', 'Hour_19', 'Hour_20',
       'Hour_21', 'Hour_22', 'Hour_23', 'Day_of_Week_4', 'Day_of_Week_5',
       'Day_of_Week_6', 'period_afternoon', 'period_dawn', 'period_morning',
       'period_night']

full = full[var]

### Out of Sample

In [None]:
ofs = full[full.total_minutes.isnull()]

In [None]:
ofs.head(3)

In [None]:
ofs.isnull().sum()

### Modeling data

In [None]:
modeling = full[~full.total_minutes.isnull()]

In [None]:
modeling.head(3)

In [None]:
modeling.isnull().sum()

In [None]:
# Getting the data:
X = modeling.drop(['total_minutes'], axis = 1)

y = modeling['total_minutes']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 451, test_size= .25)

In [None]:
StdSca = StandardScaler() ## or standerscaler 
X_train[var_num] = pd.DataFrame(StdSca.fit_transform(X_train[var_num]), columns = var_num, index = X_train.index)
X_test[var_num] = pd.DataFrame(StdSca.transform(X_test[var_num]), columns = var_num, index = X_test.index)

In [None]:
ofs[var_num] = StdSca.transform(ofs[var_num])

## Model

In [None]:
CV = cross_validate(estimator = LinearRegression(), 
                    cv = 5, 
                    #scoring = "r2", 
                    scoring = ["r2", "neg_median_absolute_error"], 
                    X = X_train,
                    y = y_train)

cv_df = pd.DataFrame(CV)
cv_df

In [None]:
CV = cross_validate(estimator = ElasticNet(alpha=0.025, l1_ratio=0.5), 
                    cv = 5, 
                    #scoring = "r2", 
                    scoring = ["r2", "neg_median_absolute_error"], 
                    X = X_train,
                    y = y_train)

cv_df = pd.DataFrame(CV)
cv_df

In [None]:
model = ElasticNet(alpha=1.0, l1_ratio=0.5).fit(X = X_train, y = y_train)

In [None]:
ofs = ofs.drop(['total_minutes'], axis = 1)
predict = model.predict(ofs)

In [18]:
full.columns

Index(['Unnamed: 0', 'order_id', 'lat_destination', 'lng_destination',
       'promised_time', 'on_demand', 'shopper_id', 'store_branch_id',
       'total_minutes', 'quantity_UN', 'quantity_KG', 'is_more_UN',
       'UN_plus_KG', 'UN_mult_KG', 'seniority', 'found_rate', 'picking_speed',
       'accepted_rate', 'rating', 'store_id', 'lat_origin', 'lng_origin',
       'Hour', 'Month', 'Day_of_Week', 'Year', 'Date', 'period', 'distance'],
      dtype='object')

In [93]:
new = full[['lng_origin', 'lat_origin', 'lng_destination', 'lat_destination']]
new

Unnamed: 0,lng_origin,lat_origin,lng_destination,lat_destination
0,-70.579250,-33.485280,-70.579369,-33.501675
1,-70.535450,-33.441246,-70.556283,-33.440584
2,-71.545615,-33.008213,-71.544842,-32.987022
3,-70.537787,-33.355258,-70.512659,-33.328075
4,-70.568075,-33.386547,-70.564020,-33.403239
...,...,...,...,...
9973,-70.516727,-33.402024,-70.508377,-33.417677
9974,-70.692160,-33.451695,-70.646795,-33.432885
9975,-70.568075,-33.386547,-70.582943,-33.424562
9976,-70.568075,-33.386547,-70.598500,-33.413806


In [None]:
'lng_origin', 'lat_origin', 'lng_destination', 'lat_destination'

In [249]:
df = full[['lng_origin', 'lat_origin', 'lng_destination', 'lat_destination']]

In [252]:
import pandas as pd
import numpy as np
import requests
import json

path = [
-70.57925,
 -33.48528,
 -70.5793685999999,
 -33.5016745551528,
 -70.53545,
 -33.441246,
 -70.5562828195387,
 -33.4405840012576,
]
path = [ {'t': t, 'lat1': c[1], 'lon1': c[2], 'lat2': c[3], 'lon2': c[2]} for t, c in enumerate(zip(*[path[i::4] for i in range(4)]))]
df = pd.DataFrame(path)

path = '&'.join(list(df[pd.notnull(df.lat1)].apply(lambda x: str(x.lon1) + ',' + str(x.lat1) + ';' + str(x.lon2) + ',' + str(x.lat2), axis=1)))
osrm = 'http://router.project-osrm.org' # currently down
#osrm = 'http://localhost:5000'
url = osrm + '/route/v1/car/' + path + '?overview=false'

# OSRM is down now but this return [ "Metamorfosenallee", "Burgemeester Matsersingel", "Burgemeester Matsersingel", "Batavierenweg" ]
#matched = [tp['waypoints'] for tp in requests.get(url).json().get("routes")[0]['distance']]
matched = [tp['distance'] for tp in requests.post(url).json()['routes'][0]['legs']]


KeyError: 'routes'

In [253]:
url

'http://router.project-osrm.org/route/v1/car/-70.5793685999999,-33.48528;-70.5793685999999,-33.5016745551528&-70.5562828195387,-33.441246;-70.5562828195387,-33.4405840012576?overview=false'

In [205]:
matched

[3364.4, 8857, 397.3]

In [188]:
requests.get(url).json().get("routes")[0]['legs']

[{'steps': [],
  'weight': 380.1,
  'distance': 3364.4,
  'summary': '',
  'duration': 380.1},
 {'steps': [],
  'weight': 644.7,
  'distance': 8857,
  'summary': '',
  'duration': 644.7},
 {'steps': [],
  'weight': 86.7,
  'distance': 397.3,
  'summary': '',
  'duration': 86.7}]

In [143]:
r = requests.get(f"http://router.project-osrm.org/route/v1/car/{df[long1][i]},{df[lat1][i]};{df[long2][i]},{df[lat2][i]}?overview=false""")
routes = json.loads(r.content)
route_1 = routes.get("routes")[0]
distance.append(route_1['distance'])

NameError: name 'distance' is not defined

In [125]:
r = requests.get(url)
routes = json.loads(r.content)
route_1 = routes.get("routes")[0]
route_1


{'legs': [{'steps': [],
   'weight': 380.1,
   'distance': 3364.4,
   'summary': '',
   'duration': 380.1}],
 'weight_name': 'routability',
 'weight': 380.1,
 'distance': 3364.4,
 'duration': 380.1}

In [104]:
r = requests.get(f"http://router.project-osrm.org/route/v1/car/{-70.57925},{-33.48528};{-70.579369},{-33.501675}?overview=false""")
routes = json.loads(r.content)
route_1 = routes.get("routes")[0]
route_1

{'legs': [{'steps': [],
   'weight': 380.8,
   'distance': 3367.1,
   'summary': '',
   'duration': 380.8}],
 'weight_name': 'routability',
 'weight': 380.8,
 'distance': 3367.1,
 'duration': 380.8}

In [None]:
path = [
  ,
 ,
 ,
 ,
]

In [82]:
r = requests.get('http://router.project-osrm.org/route/v1/car/-33.48528,-70.57925;-33.5016745551528,-70.5793685999999?overview=false')
routes = json.loads(r.content)
route_1 = routes.get("routes")[0]
route_1


{'legs': [{'steps': [],
   'weight': 0,
   'distance': 0,
   'summary': '',
   'duration': 0}],
 'weight_name': 'routability',
 'weight': 0,
 'distance': 0,
 'duration': 0}

In [158]:
import pandas as pd
import numpy as np
import requests
import json

path = [
  51.954974, 5.857131,
  51.955014, 5.860725,
  51.954168, 5.866390,
  51.954889, 5.868611,
]
path = [ {'t': t, 'lat': c[0], 'lon': c[1]} for t, c in enumerate(zip(*[path[i::2] for i in range(2)]))]
df = pd.DataFrame(path)

path = ';'.join(list(df[pd.notnull(df.lat)].apply(lambda x: str(x.lon) + ',' + str(x.lat), axis=1)))
osrm = 'http://router.project-osrm.org' # currently down
#osrm = 'http://localhost:5000'
url = osrm + '/match/v1/car/' + path + '?overview=full&annotations=nodes&tidy=true'

# OSRM is down now but this return [ "Metamorfosenallee", "Burgemeester Matsersingel", "Burgemeester Matsersingel", "Batavierenweg" ]
matched = [tp['name'] for tp in requests.post(url).json()['tracepoints']]

In [159]:
requests.post(url).json()

{'code': 'Ok',
 'tracepoints': [{'alternatives_count': 0,
   'location': [5.857128, 51.954998],
   'distance': 2.678366,
   'hint': 'B3rKjP___38tAAAAYQAAAPIAAAApAAAA5xtMQisvZUIIrYZD3b6-QS0AAABhAAAA8gAAACkAAABO6AAAaF9ZADbFGANrX1kAHsUYAwQAXwXAC-yX',
   'name': 'Metamorfosenallee',
   'matchings_index': 0,
   'waypoint_index': 0},
  {'alternatives_count': 0,
   'location': [5.860727, 51.955036],
   'distance': 2.45174,
   'hint': 's8Qzgf___38eAAAAOgAAAGkAAAC4AAAALGIIQhXe9kFDO-pC1K42Qx4AAAA6AAAAaQAAALgAAABO6AAAd21ZAFzFGAN1bVkARsUYAwIA3wXAC-yX',
   'name': 'Burgemeester Matsersingel',
   'matchings_index': 0,
   'waypoint_index': 1},
  {'alternatives_count': 0,
   'location': [5.866385, 51.954161],
   'distance': 0.851292,
   'hint': 'u8Qzgf___38XAAAAIgAAAJMAAABbAAAAJwfPQYRRO0G9fSND7UieQhcAAAAiAAAAkwAAAFsAAABO6AAAkYNZAPHBGAOWg1kA-MEYAwUAPwfAC-yX',
   'name': 'Burgemeester Matsersingel',
   'matchings_index': 0,
   'waypoint_index': 2},
  {'alternatives_count': 1,
   'location': [5.868628, 5

In [None]:
r = requests.get(f"http://router.project-osrm.org/route/v1/car/{df[long1][i]},{df[lat1][i]};{df[long2][i]},{df[lat2][i]}?overview=false""")
        routes = json.loads(r.content)
        route_1 = routes.get("routes")[0]