In [1]:
import pandas as pd
import numpy as np
import replicaEVSE.load_curve as sim
import replicaEVSE.datautils as simdu
import os
import joblib
import dask.dataframe as dd

%reload_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', None)

datadir = '../../data/'
mode = 'PRIVATE_AUTO'
test = False

In [2]:


#Created in the EIA_data_download.ipynb notebook
existing_load=pd.read_csv(datadir+'EIA_demand_summary.csv') 
if test:
    # df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_subsample.parquet'))
    df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_sorted_county.parquet'))
    df = df.head(10000)
    df = df.loc[df['mode'] == mode]
    simulation_id = 'dev'

else: 
    # read in the joined trips and population data sets
    merged_df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_sorted_county.parquet'))

    # right now, only look at private auto trips
    df = merged_df.loc[merged_df['mode'] == mode]
    # take out the mobile and commercial MHDV

df = df[(df['building_type'] != 'mobile') & (df['building_type'] != None)]



###  sample by county: for now use a percent until we have the stock rollover model

`ev_df` should be given to us to run. 

In [3]:
stock_rollover = pd.read_csv(datadir+'LDV_pop_adjusted.csv')
efficiency = pd.read_csv(datadir+'vehicle_inputs.csv')

In [40]:
personal = ['Personal Sedan', 'Personal Crossover', 'Personal Truck/SUV']
commercial = ['Commercial Sedan', 'Commercial Crossover', 'Commercial Truck/SUV']
for cartype in personal + commercial:
    for powertrain in  ['EV', 'PHEV']:
        cond = (efficiency['Vehicle_type']==cartype) & ~efficiency['Powertrain'].isin(['ICE-G', 'ICE-D', 'FCEV']) & (efficiency['Powertrain']==powertrain) & (efficiency['Vintage'] == 2022)
        ef = efficiency[cond]['Efficiency'].values[0]
        print(cartype, powertrain, ef)

Personal Sedan EV 0.25
Personal Sedan PHEV 80.0
Personal Crossover EV 0.3
Personal Crossover PHEV 80.0
Personal Truck/SUV EV 0.49
Personal Truck/SUV PHEV 80.0
Commercial Sedan EV 0.25
Commercial Sedan PHEV 80.0
Commercial Crossover EV 0.3
Commercial Crossover PHEV 80.0
Commercial Truck/SUV EV 0.49
Commercial Truck/SUV PHEV 80.0


In [4]:
nev_df = stock_rollover[stock_rollover['Powertrain']=='EV'].copy()
# nev_df = nev_df[nev_df['Vehicle_type']==segment].copy()
nev_df = nev_df[nev_df['domicile'] != 'other'].copy()


In [30]:
nev_df[['2022', '2035']]

Unnamed: 0,2022,2035
0,0,1
5,0,5
9,0,0
15,1,13
19,0,77
...,...,...
2363,1,2239
2367,49,6109
2374,190,21705
2378,34,22073


In [13]:
reduced_df = []
unique_df = df.drop_duplicates(subset=['person_id'])[['person_id', 'destination_county', 'building_type']]
for year in [2023, 2025, 2030, 2035]:
    print(nev_df[str(year)].sum())
    num_to_select = nev_df[str(year)].sum()
    selected = unique_df.person_id.sample(n=num_to_select, replace=False, random_state=42)
    
    # grab only those selected people from the original dataframe
    year_df = df[(df['person_id'].isin(selected))].copy()
    year_df['year'] = year
    reduced_df.append(year_df)
    
final_df = pd.concat(reduced_df)
    

235891
235891
616258
616258
1619946
1619946
3030386
3030386


In [31]:
final_df[final_df['year'] == 2035]['person_id'].nunique()

3030386

In [32]:
final_df.columns

Index(['activity_id', 'person_id', 'mode', 'travel_purpose',
       'previous_activity_type', 'start_time', 'end_time', 'distance_miles',
       'vehicle_type', 'origin_bgrp', 'origin_bgrp_lat', 'origin_bgrp_lng',
       'destination_bgrp', 'destination_bgrp_lat', 'destination_bgrp_lng',
       'origin_land_use_l1', 'origin_land_use_l2', 'origin_building_use_l1',
       'origin_building_use_l2', 'destination_land_use_l1',
       'destination_land_use_l2', 'destination_building_use_l1',
       'destination_building_use_l2', 'origin_lat', 'origin_lng',
       'destination_lat', 'destination_lng', 'weekday', 'household_id',
       'BLOCKGROUP', 'BLOCKGROUP_work', 'BLOCKGROUP_school', 'TRACT',
       'TRACT_work', 'TRACT_school', 'age_group', 'age', 'sex', 'race',
       'ethnicity', 'individual_income_group', 'individual_income',
       'employment', 'education', 'school_grade_attending', 'industry',
       'household_role', 'subfamily_number', 'subfamily_relationship',
       'commute_mo

In [38]:
seg_list = ['Personal Sedan',
 'Personal Crossover',
 'Personal Truck/SUV',
 'Commercial Sedan',
 'Commercial Crossover',
 'Commercial Truck/SUV']

datadir = '../../data/'

existing_load=pd.read_csv(datadir+'EIA_demand_summary.csv') 
simulation_id = f'by_year_{str(year)}'
charge_df_list = []
loads_df_list = []
workfrac_arr = np.linspace(0.2, 0.6, 4)
multiunitfrac_arr = np.linspace(0.2, 0.6, 4)
years = [2023, 2025, 2030, 2035]

for year, workfrac, multiunitfrac  in zip(years, workfrac_arr, multiunitfrac_arr):
    charge_df_seg_list = []
    loads_df_seg_list = []
    df_year = final_df[final_df['year'] == year]

    number_of_chunks = 10000
    df_list = np.array_split(df_year, number_of_chunks)

    charge_sims = joblib.Parallel(verbose=10, n_jobs=-1)(joblib.delayed(sim.simulate_person_load)(
    trips_df=df_i, 
    existing_load=existing_load,
    simulation_id=simulation_id,
    managed=False,
    efficiency=0.3,
    frac_work_charging=workfrac,
    frac_non_office_charging=0.1,
    frac_civic_charging=0.5,
    frac_multiunit_charging=multiunitfrac,
    frac_singleunit_charging=1.0,
    frac_public_dcfc=1.0) 
    for df_i in df_list)

    print('creating charge and loads')
    charges_list = [x['charges'] for x in charge_sims]
    loads_list = [x['loads'] for x in charge_sims]

    # restack the dataframes
    charges_df = pd.concat(charges_list)
    loads_df = pd.concat(loads_list) # huge ~200 million rows
    
    charges_df['year'] = year
    charges_df['segment'] = 'all'
    charges_df['work_frac'] = workfrac
    charges_df['multiunit_frac'] = multiunitfrac

    loads_df['year'] = year
    loads_df['segment'] = 'all'
    loads_df['work_frac'] = workfrac
    loads_df['multiunit_frac'] = multiunitfrac

    charges_df.to_parquet(os.path.join(datadir, f'loads_charges/charges_{year}_by_year_2023-06-06.parquet'))
    loads_df.to_parquet(os.path.join(datadir, f'loads_charges/loads_{year}_by_year_2023-06-06.parquet'))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 96 concurrent workers.


In [19]:
test_2023 = pd.read_parquet(os.path.join(datadir, 'county_samples/county_sample_personal_sedan_2023.parquet'))
test_2025 = pd.read_parquet(os.path.join(datadir, 'county_samples/county_sample_personal_sedan_2025.parquet'))

In [28]:
test_2023['activity_id'].nunique(), test_2025['activity_id'].nunique()

(1273081, 1273081)

In [26]:
test_2023['person_id'].nunique(), test_2025['person_id'].nunique()

(252329, 252329)

In [23]:
unique_df = df.drop_duplicates(subset=['person_id'])[['person_id', 'destination_county', 'building_type']]

In [24]:
unique_df.shape

(5046603, 3)