# This is a sandbox for setting up a run of the simulation on LDV. 

1. load data: this is where you can decide which mode ('PRIVATE_AUTO', 'COMMERCIAL', etc) to run on. 
2. sample each county to match the number of vehicles from the stock rollover model. Alternatively, use a set fraction from each county to sample from in lieu of that information. 
3. run the simulation in parallel.
4. restack the outputs into charges and loads dataframes. 


In [4]:
import pandas as pd
import numpy as np
import replicaEVSE.load_curve as sim
import replicaEVSE.datautils as simdu
import os
import joblib
import dask.dataframe as dd

%reload_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', None)

datadir = '../../data/'
mode = 'PRIVATE_AUTO'
simulation_id = 'base_dev_with_ldv_logic_2021_20230528'
test = False

#Created in the EIA_data_download.ipynb notebook
existing_load=pd.read_csv(datadir+'EIA_demand_summary.csv') 
if test:
    # df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_subsample.parquet'))
    df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_sorted_county.parquet'))
    df = df.head(10000)
    df = df.loc[df['mode'] == mode]
    simulation_id = 'dev'

else: 
    # read in the joined trips and population data sets
    merged_df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_sorted_county.parquet'))

    # right now, only look at private auto trips
    df = merged_df.loc[merged_df['mode'] == mode]

###  sample by county: for now use a percent until we have the stock rollover model

`ev_df` should be given to us to run. 

In [6]:
county_list = df.destination_county.unique()

# make up a random number of people to select for each county 
# since I dont have the real numbers yet. 
people_to_select = [np.random.randint(1, 1000) for _ in range(len(county_list))]
# make a data frame to mimic the output of the stock rollover model
ev_df = pd.DataFrame({ 'county': county_list, 'nEV': people_to_select, 'ef': 0.3})

Or just run on a fraction of each county as a guess

In [8]:
df_county_subset = simdu.sample_people_by_county(df, ev_df, fraction=0.05)

embarrassingly parallel = split into chunks and run on each chunk in parallel

In [9]:
if not test:
    number_of_chunks = 10000
    df_list = np.array_split(df_county_subset, number_of_chunks)

In [10]:
if not test:
    # run the simulation in parallel
    # df must be a pandas dataframe

    charge_sims = joblib.Parallel(verbose=10, n_jobs=-1)(joblib.delayed(sim.simulate_person_load)(
        trips_df=df_i, 
        existing_load=existing_load,
        simulation_id=simulation_id,
        managed=False) 
        for df_i in df_list)
else: 
    sim.simulate_person_load(trips_df=df, 
        existing_load=existing_load,
        simulation_id=simulation_id,
        managed=False) 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   13.5s
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   13.6s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   13.7s
[Parallel(n_jobs=-1)]: Done  72 tasks      | elapsed:   14.2s
[Parallel(n_jobs=-1)]: Done  93 tasks      | elapsed:   14.3s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   14.9s
[Parallel(n_jobs=-1)]: Done 160 tasks      | elapsed:   15.3s
[Parallel(n_jobs=-1)]: Done 185 tasks      | elapsed:   15.5s
[Parallel(n_jobs=-1)]: Done 210 tasks      | elapsed:   16.0s
[Parallel(n_jobs=-1)]: Done 237 tasks      | elapsed:   16.3s
[Parallel(n_jobs=-1)]: Done 264 tasks      | elapsed:   16.7s
[Parallel(n_jobs=-1)]: Done 293 tasks      | elapsed:   17.1s
[Parallel(n_jobs=-1)]: Done 322 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done 353 tasks      | elapsed:  

### Restack the DataFrames

In [11]:
# restack the dataframes
charges_list = [x['charges'] for x in charge_sims]
loads_list = [x['loads'] for x in charge_sims]

In [12]:
# restack the dataframes
charges_df = pd.concat(charges_list)
loads_df = pd.concat(loads_list) # huge ~200 million rows

In [14]:
charges_df.to_parquet(os.path.join(datadir, f'charges_{mode}_{simulation_id}.parquet'))
loads_df.to_parquet(os.path.join(datadir, f'loads_{mode}_{simulation_id}.parquet'))