In [323]:


import pandas as pd
import dask.dataframe as dd
import dask.array as da
import numpy as np
from dask.distributed import Client, LocalCluster
import replicaEVSE.sql_wrapper_functions as sql
import replicaEVSE.load_curve as sim
import replicaEVSE.geography as geo
import os
import requests
import geopandas as gpd
import datetime
from dask.diagnostics import ProgressBar
import dask

%reload_ext autoreload
%autoreload 2

# pbar = ProgressBar()
# pbar.register()

pd.set_option('display.max_columns', None)

datadir = '../../data'

In [324]:
def assign_charger_available(df):
    df['charger_power_kW'] = 0
    df['charger_power_kW'] = df['charger_power_kW'].mask(df['charge_type'] == 'HOME', 7.2)
    df['charger_power_kW'] = df['charger_power_kW'].mask(df['charge_type'] == 'WORK', 7.2)
    df['charger_power_kW'] = df['charger_power_kW'].mask(df['charge_type'] == 'PUBLIC', 150)
    return df


def assign_charger_opportunity(ddf, person_id_list, weekday='thursday', outtype='dask'):
    ddf = ddf.sort_values(by=['person_id', 'start_time', 'weekday'])
    ddf = ddf.reset_index(drop=True)

    # calculate stop duration and add to df
    if type(ddf) == pd.core.frame.DataFrame:
        ddf = sim.calculate_stop_duration(ddf)
    else:
        ddf = ddf.map_partitions(sim.calculate_stop_duration)

    # add total miles per person
    total_distance_df = ddf.groupby('person_id')['distance_miles'].sum().to_frame('total_distance')
    ddf = ddf.merge(total_distance_df, on='person_id', how='left')

    # get charger availability
    ddf = assign_charger_available(ddf)

    # Calculate total charge opportunity using stop duration and charger power
    ddf['charge_opportunity_remaining_kWh'] = 0
    ddf['charge_opportunity_remaining_kWh'] = \
        ddf['stop_duration'].dt.seconds/60/60*ddf['charger_power_kW']

    # Initialize total energy to consume
    if person_id_list is not None:
        cond1 = ddf['person_id'].isin(person_id_list)
    cond = cond1 & (ddf['weekday'] == weekday) 

    trips = ddf.loc[cond].sort_values(by=['person_id', 'start_time'])
    
    # calculate stop duration and add to df
    # if type(pd.core.frame.DataFrame)
    # trips = trips.map_partitions(sim.calculate_stop_duration)
    if outtype != 'dask':
        trips = trips.compute()

    return trips

In [325]:
# read in the data and take a subset for testing
merged_ddf = dd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips.parquet'))
ddf = merged_ddf.head(10000)
ddf = dd.from_pandas(ddf, chunksize=1000)
# right now, only look at private auto trips
ddf = ddf.loc[ddf['mode'] == 'PRIVATE_AUTO']

In [326]:
person_id_list = ['12464838112032829470']
weekday = 'thursday'
consumption_kWh_per_mi = 0.3
trips = assign_charger_opportunity(ddf, person_id_list, weekday)
total_energy = trips['total_distance']*consumption_kWh_per_mi


In [327]:
# who has the most trips
cond = (ddf['weekday'] == weekday) 
ddf.loc[cond]['person_id'].value_counts().compute().head(5)

12464838112032829470    4
3739083454226448410     4
6843763911659648578     4
8653789151746634547     4
16894664332853945147    4
Name: person_id, dtype: int64

In [328]:
trips[[
        'person_id',
        'mode',
        'travel_purpose',
        'previous_activity_type',
        'start_time',
        'end_time',
        'stop_duration',
        'charge_type',
        'charger_power_kW',
    ]].compute()


Unnamed: 0,person_id,mode,travel_purpose,previous_activity_type,start_time,end_time,stop_duration,charge_type,charger_power_kW
336,12464838112032829470,PRIVATE_AUTO,SOCIAL,HOME,0 days 05:32:28,0 days 06:32:25,0 days 04:11:52,PUBLIC,150.0
337,12464838112032829470,PRIVATE_AUTO,EAT,SHOP,0 days 10:44:17,0 days 10:51:25,0 days 03:20:38,PUBLIC,150.0
338,12464838112032829470,PRIVATE_AUTO,MAINTENANCE,OTHER_ACTIVITY_TYPE,0 days 14:12:03,0 days 14:19:03,0 days 03:15:41,PUBLIC,150.0
340,12464838112032829470,PRIVATE_AUTO,RECREATION,HOME,0 days 17:37:51,0 days 17:43:41,0 days 11:48:47,PUBLIC,150.0


In [329]:
def while_loop(trips: dd) -> dd:
    # Initialize count variables
    i = 0
    j = 0
    opportunities = True

    charger_availability = sim.determine_charger_availability(pd.DataFrame(), trips)

    charge_set = list(set(trips.charge_type))
    charge_dict = {}
    # Before adding the options to the dictionary, check if the stop type exists in the trip data
    #  there are ranges over level 2, and DCFC are all public charging options
    if 'HOME' in charge_set:
        charge_dict.update({'HOME': 7.2})
    if 'WORK' in charge_set:
        charge_dict.update({'WORK': 7.2})
    if 'PUBLIC' in charge_set:
        charge_dict.update({'PUBLIC': 150})



    print(charger_availability)
    print(len(charger_availability))
    consumption_kWh_per_mi = 0.3
    total_energy = trips['total_distance']*consumption_kWh_per_mi
    print(total_energy)


    total_energy = total_energy.iloc[0]
    # Initialize total energy to consume
    remaining_energy = total_energy

    # Note: charge priority should favor home charging
    # Allocate charge energy across available charge opportunities until all energy is
    # recharged or opportunities run out
    while (remaining_energy > 0) & (opportunities is True):
        print(i, j, remaining_energy)
        charge_location = list(charger_availability.keys())[i]
        print(list(charge_dict.keys())[i])

        # this prioritizes longest stop (i.e. home charging) and then shortest stop 
        stops_sub = trips.loc[trips.charge_type == charge_location].sort_values(
            by='stop_duration', ascending=False)
        ind = stops_sub.index[j]
        charge_energy = np.min(
            [trips.loc[ind, 'charge_opportunity_remaining_kWh'], remaining_energy])
        trips.loc[ind, 'charge_energy_used_kWh'] = charge_energy
        trips.loc[ind, 'charge_opportunity_remaining_kWh'] -= charge_energy


        remaining_energy = np.max([remaining_energy-charge_energy, 0])
        j += 1
        if j == len(stops_sub):
            j = 0
            i += 1
        if i == len(charger_availability):
            opportunities = False
        print(i, j, remaining_energy)

In [317]:
sim.determine_charger_availability(pd.DataFrame(), trips).keys()

dict_keys(['PUBLIC'])

In [321]:
ddf = assign_charger_opportunity(ddf, person_id_list, weekday)
print(type(ddf))
out = ddf.map_partitions(while_loop)

<class 'dask.dataframe.core.DataFrame'>
{}
0


  meta = left._meta_nonempty.merge(right._meta_nonempty, **kwargs)


ValueError: Metadata inference failed in `while_loop`.

You have supplied a custom function and Dask is unable to 
determine the type of output that that function returns. 

To resolve this please provide a meta= keyword.
The docstring of the Dask function you ran should have more information.

Original error is below:
------------------------
KeyError('total_distance')

Traceback:
---------
  File "/Users/matthew.wilde/mambaforge/envs/py311/lib/python3.11/site-packages/dask/dataframe/utils.py", line 193, in raise_on_meta_error
    yield
  File "/Users/matthew.wilde/mambaforge/envs/py311/lib/python3.11/site-packages/dask/dataframe/core.py", line 6804, in _emulate
    return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/yj/8z5y9f4j1d7gq0dz2s07gc840000gp/T/ipykernel_91226/43490972.py", line 25, in while_loop
    total_energy = trips['total_distance']*consumption_kWh_per_mi
                   ~~~~~^^^^^^^^^^^^^^^^^^
  File "/Users/matthew.wilde/mambaforge/envs/py311/lib/python3.11/site-packages/pandas/core/frame.py", line 3807, in __getitem__
    indexer = self.columns.get_loc(key)
              ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/matthew.wilde/mambaforge/envs/py311/lib/python3.11/site-packages/pandas/core/indexes/base.py", line 3804, in get_loc
    raise KeyError(key) from err


In [336]:
ddf.loc[ddf['end_time'] == ddf['end_time'].min()]['person_id'].compute()

766    17250274462130214555
Name: person_id, dtype: object

In [339]:
merged_ddf.loc[merged_ddf['person_id'] == '17250274462130214555'].compute()

Unnamed: 0,activity_id,person_id,mode,travel_purpose,previous_activity_type,start_time,end_time,distance_miles,vehicle_type,origin_bgrp,origin_bgrp_lat,origin_bgrp_lng,destination_bgrp,destination_bgrp_lat,destination_bgrp_lng,origin_land_use_l1,origin_land_use_l2,origin_building_use_l1,origin_building_use_l2,destination_land_use_l1,destination_land_use_l2,destination_building_use_l1,destination_building_use_l2,origin_lat,origin_lng,destination_lat,destination_lng,weekday,household_id,BLOCKGROUP,BLOCKGROUP_work,BLOCKGROUP_school,TRACT,TRACT_work,TRACT_school,age_group,age,sex,race,ethnicity,individual_income_group,individual_income,employment,education,school_grade_attending,industry,household_role,subfamily_number,subfamily_relationship,commute_mode,tenure,migration,household_size,household_income_group,household_income,family_structure,vehicles,building_type,resident_type,language,lat,lng,lat_work,lng_work,lat_school,lng_school,wfh,charge_type
766,9394107680084557282,17250274462130214555,PRIVATE_AUTO,EAT,HOME,0 days 18:49:00,0 days 00:00:56,252.977203,,410599514002,45.271844,-119.013362,530630120002,47.665866,-117.277437,residential,single_family,residential,single_family,commercial,retail,commercial,retail,45.137132,-118.929489,47.657774,-117.271889,thursday,16140367170371447841,410599514002,410599514002,,41059951400,41059951400,,35_64,63.0,M,white,not_hispanic_or_latino,40000_80000,54666.0,employed,bachelors_degree,not_attending_school,naics11,head_of_household,0,,driving,owner,same_house,3_person,75000_125000,123567.0,married_couple,3_plus,single_family,core,english,45.137132,-118.929489,45.529362,-118.792404,,,worked_in_person,PUBLIC


In [315]:
out.compute()

0    {'PUBLIC': 150}
dtype: object

In [247]:
stops_sub[[
        'person_id',
        'mode',
        'travel_purpose',
        'previous_activity_type',
        'start_time',
        'end_time',
        'stop_duration',
        'charge_type',
        'charger_power_kW',
    ]]

Unnamed: 0,person_id,mode,travel_purpose,previous_activity_type,start_time,end_time,stop_duration,charge_type,charger_power_kW
3,12464838112032829470,PRIVATE_AUTO,RECREATION,HOME,0 days 17:37:51,0 days 17:43:41,0 days 11:48:47,PUBLIC,150.0
0,12464838112032829470,PRIVATE_AUTO,SOCIAL,HOME,0 days 05:32:28,0 days 06:32:25,0 days 04:11:52,PUBLIC,150.0
1,12464838112032829470,PRIVATE_AUTO,EAT,SHOP,0 days 10:44:17,0 days 10:51:25,0 days 03:20:38,PUBLIC,150.0
2,12464838112032829470,PRIVATE_AUTO,MAINTENANCE,OTHER_ACTIVITY_TYPE,0 days 14:12:03,0 days 14:19:03,0 days 03:18:48,PUBLIC,150.0


In [158]:
charger_availability.keys()

dict_keys(['PUBLIC'])

In [207]:
trips[[
        'person_id',
        'mode',
        'start_time',
        'end_time',
        'stop_duration',
        'charger_power_kW',
        'charge_energy_used_kWh',
        'charge_opportunity_remaining_kWh'
    ]]

Unnamed: 0,person_id,mode,start_time,end_time,stop_duration,charger_power_kW,charge_energy_used_kWh,charge_opportunity_remaining_kWh
405,3742555751551719909,PRIVATE_AUTO,0 days 04:55:25,0 days 05:36:56,0 days 00:49:42,150.0,,124.25
406,3742555751551719909,PRIVATE_AUTO,0 days 06:26:38,0 days 07:57:29,0 days 01:23:44,150.0,80.305858,129.027475
408,3742555751551719909,PRIVATE_AUTO,0 days 13:28:20,0 days 13:43:16,0 days 00:54:30,150.0,,136.25


In [159]:
trips['charge_type']

[autoreload of replicaEVSE.load_curve failed: Traceback (most recent call last):
  File "/Users/matthew.wilde/mambaforge/envs/py311/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 273, in check
    superreload(m, reload, self.old_objects)
  File "/Users/matthew.wilde/mambaforge/envs/py311/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 471, in superreload
    module = reload(module)
             ^^^^^^^^^^^^^^
  File "/Users/matthew.wilde/mambaforge/envs/py311/lib/python3.11/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 621, in _exec
  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/Users/matthew.wilde/Projects/WATES/replicaEVSE/src/replicaEVSE/load_curve.py", line 74, in <module>
    df: pd.dataframe,) -> dict:
        ^^^^^^^^^^^^
  File "/Users/matthew.wi

339    PUBLIC
340    PUBLIC
341    PUBLIC
Name: charge_type, dtype: object