In [None]:
import pandas as pd
import numpy as np

import dask.dataframe as dd
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
import hvplot.dask

from folium import Map
from folium.plugins import HeatMap, HeatMapWithTime

  from distributed.utils import tmpfile


In [None]:
!squeue -u aschade

In [2]:
!scancel -u aschade

In [3]:
cluster = SLURMCluster(
    cores=12,                          
    memory='200GB',  
#     memory='1400GB',
    
    local_directory='~/scratch',
    job_extra=[
#         '--reservation=lab_rdurante_304',
        '--time=08:00:00',
        
        '--partition=haswell',    
        '--nodes=1',
        
        '--job-name=dask',
        '--output=dask.out', 
        '--error=dask.error', 
        '--mail-user=aaron.schade@upf.edu',
        '--mail-type=NONE', 
    ],    
    n_workers=1,                 # this is internal to one job? one node? 
    
    interface='ib0',               # workers, no diag: em1, em2, ib0,   # no workers: lo, em1.851, idrac, em3 & em4 (no ipv4)
    scheduler_options={
#         'interface': 'em1',      # it wont allow you specify both an interface AND a host address
        'host': '10.30.50.163',    # launch on this address, open dashboard on the other?
    },
)
cluster.scale(jobs=1)


scheduler = Client(cluster)
print(scheduler)
dashboardLink = scheduler.dashboard_link.replace('10.30.50.163', '10.60.110.163')
# dashboardLink = scheduler.dashboard_link
print(dashboardLink)
print(dashboardLink.replace('status', 'workers'))
print(dashboardLink.replace('status', 'graph'))

<Client: 'tcp://10.30.50.163:46200' processes=0 threads=0, memory=0 B>
http://10.60.110.163:35587/status
http://10.60.110.163:35587/workers
http://10.60.110.163:35587/graph


Perhaps you already have a cluster running?
Hosting the HTTP server on port 35587 instead


In [4]:
!squeue -u aschade

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
          19245734   haswell     dask  aschade CG       6:02      1 mr-00-14
          19245735   haswell     dask  aschade PD       0:00      1 (None)


In [5]:
orders = dd.read_parquet('~/scratch/glovo/parquet/orders/')
orders.head()

Unnamed: 0_level_0,indexNumber,order_category,order_description,purchases_total_price,total_delivery_time_minutes,courier_total_distance_driven,pickup_latitude,pickup_longitude,delivery_latitude,delivery_longitude,...,order_weather_revenue_without_tax_eur,order_basket_revenue_without_tax_eur,order_total_partner_commission_eur,order_is_prime,order_bonus_coefficient,order_bonus_reason,order_base_cost_eur,order_bonus_cost_eur,order_courier_rain_bonus_eur,order_total_cost_eur
order_date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-02-01 00:00:01,116,RESTAURANT,1 x Quesadilla de Jamón y Queso - Salsa de Qu...,35.4,37.3,1.134,40.439811,-3.682568,40.447795,-3.679396,...,1.23967,0.0,7.08,False,130.0,BAD_WEATHER,3.2362,0.96,0.96,4.1962
2019-02-01 00:00:02,119,RESTAURANT,1 x Ensalada Mediterránea \n1 x Hamburguesa de...,7.0,34.7333,6.037,39.491367,-0.376666,39.463103,-0.380344,...,0.0,0.0,2.1,False,100.0,,4.44888,0.0,0.0,4.44888
2019-02-01 00:00:07,121,MCDONALDSES,1 x McMenú® Grand McExtreme™ de McDonald's Bac...,16.5,37.0333,4.093,41.403863,2.17347,41.412427,2.163945,...,0.0,0.0,0.0,False,100.0,,4.44623,0.0,0.0,4.44623
2019-02-01 00:00:13,123,RESTAURANT,2 x TGB Burger\n2 x Patatas Bastón Medianas - ...,16.8,42.0,6.539,40.429598,-3.714873,40.406064,-3.679678,...,0.0,0.0,5.04,False,130.0,BAD_WEATHER,4.72326,1.41,1.41,6.13326
2019-02-01 00:00:19,125,RESTAURANT,1 x Deluxe Mix (12 uds.)\n1 x Maki Mix (24 uds.),22.8,56.75,14.671,40.431684,-3.709129,40.381163,-3.743104,...,0.0,0.0,9.12,False,130.0,BAD_WEATHER,7.50981,2.25,2.25,9.75981


In [6]:
len(orders)

24697040

In [9]:
ordersSample = orders.sample(frac=0.4).compute()
len(ordersSample)

9878815

In [None]:
map = Map(location=[39.9941042,-2.8447872], zoom_start=6)
heatmap = HeatMap(
    data=list(zip(ordersSample.delivery_latitude, ordersSample.delivery_longitude)), 
    min_opacity=0.3, 
    radius=10, 
    blur=10, 
    max_zoom=1, 
).add_to(map)

map

In [11]:
map.save('outputs/orders_spatial_distribution.html')