In [1]:
import pandas as pd
import numpy as np
import replicaEVSE.load_curve as sim
import replicaEVSE.datautils as simdu
import os
import joblib
import dask.dataframe as dd
from tqdm import tqdm

%reload_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', None)

datadir = '../../data/'
mode = 'PRIVATE_AUTO'
test = False

# Sample the number of people/vehicles by county segment and year

see  `scripts/sample_counties.py` to run 

In [2]:


#Created in the EIA_data_download.ipynb notebook
existing_load=pd.read_csv(datadir+'EIA_demand_summary.csv') 
if test:
    # df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_subsample.parquet'))
    df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_sorted_county.parquet'))
    df = df.head(10000)
    df = df.loc[df['mode'] == mode]
    simulation_id = 'dev'

else: 
    # read in the joined trips and population data sets
    merged_df = pd.read_parquet(os.path.join(datadir, 'wa_pop_and_trips_sorted_county.parquet'))

    # right now, only look at private auto trips
    df = merged_df.loc[merged_df['mode'] == mode]
    # take out the mobile and commercial MHDV

### TODO: revisit taking out mobile home owners
df = df[(df['building_type'] != 'mobile') & (df['building_type'] != None)]



In [3]:
counties = pd.read_parquet(datadir+'/population_counties_dataset.parquet', engine='pyarrow')
df = pd.merge(df, counties, on='person_id', how='left')

In [4]:
full_df = merged_df.loc[merged_df['mode'] == mode]
full_df = pd.merge(full_df, counties, on='person_id', how='left')
full_df = full_df[(full_df['home_cty'] == 'Yakima County, WA')]

In [5]:
full_df.drop_duplicates(subset=['person_id'], inplace=True)

In [6]:
df.columns

Index(['activity_id', 'person_id', 'mode', 'travel_purpose',
       'previous_activity_type', 'start_time', 'end_time', 'distance_miles',
       'vehicle_type', 'origin_bgrp', 'origin_bgrp_lat', 'origin_bgrp_lng',
       'destination_bgrp', 'destination_bgrp_lat', 'destination_bgrp_lng',
       'origin_land_use_l1', 'origin_land_use_l2', 'origin_building_use_l1',
       'origin_building_use_l2', 'destination_land_use_l1',
       'destination_land_use_l2', 'destination_building_use_l1',
       'destination_building_use_l2', 'origin_lat', 'origin_lng',
       'destination_lat', 'destination_lng', 'weekday', 'household_id',
       'BLOCKGROUP', 'BLOCKGROUP_work', 'BLOCKGROUP_school', 'TRACT',
       'TRACT_work', 'TRACT_school', 'age_group', 'age', 'sex', 'race',
       'ethnicity', 'individual_income_group', 'individual_income',
       'employment', 'education', 'school_grade_attending', 'industry',
       'household_role', 'subfamily_number', 'subfamily_relationship',
       'commute_mo

### Stock rollover model from Gerard of Evs by county, year, vehicle segment, engine (EV, PHEV) and housing (sfh, mfh).

`ev_df`

In [7]:
stock_rollover = pd.read_csv(datadir+'ldv_population_output_adjusted.csv')
efficiency = pd.read_csv(datadir+'vehicle_inputs.csv')

In [8]:
personal = ['Personal Sedan', 'Personal Crossover', 'Personal Truck/SUV']
commercial = ['Commercial Sedan', 'Commercial Crossover', 'Commercial Truck/SUV']
for cartype in personal + commercial:
    for powertrain in  ['EV', 'PHEV']:
        cond = (efficiency['Vehicle_type']==cartype) & ~efficiency['Powertrain'].isin(['ICE-G', 'ICE-D', 'FCEV']) & (efficiency['Powertrain']==powertrain) & (efficiency['Vintage'] == 2022)
        ef = efficiency[cond]['Efficiency'].values[0]
        print(cartype, powertrain, ef)

Personal Sedan EV 0.25
Personal Sedan PHEV 80.0
Personal Crossover EV 0.3
Personal Crossover PHEV 80.0
Personal Truck/SUV EV 0.49
Personal Truck/SUV PHEV 80.0
Commercial Sedan EV 0.25
Commercial Sedan PHEV 80.0
Commercial Crossover EV 0.3
Commercial Crossover PHEV 80.0
Commercial Truck/SUV EV 0.49
Commercial Truck/SUV PHEV 80.0


# extract the population ldv data from the stock rollover model 
 - What to do about housing == 'other'? 

### include both EV and PHEV

In [9]:
ev_cond = stock_rollover['Powertrain'].isin(['EV', 'PHEV'])
nev_df = stock_rollover[ev_cond].copy().reset_index(drop=True)
nev_df = nev_df[nev_df['domicile'] != 'other'].copy()
nev_df.drop(columns=['Unnamed: 0'], inplace=True)


See the change in stock over the years

We ingest the number of vehicles in each segment for each year and sample from that subset of the trips table for each county segment and year. This takes > 300 minutes for all the years and segments

In [10]:
nev_df

Unnamed: 0,County,Vehicle_type,domicile,Powertrain,2022,2023,2024,2025,2026,2027,2028,2029,2030,2031,2032,2033,2034,2035
0,Adams,Commercial Crossover,mfh,EV,0,0,0,0,0,0,0,1,1,1,1,1,1,1
1,Adams,Commercial Crossover,mfh,PHEV,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Adams,Commercial Crossover,sfh,EV,0,0,1,1,1,2,2,2,3,3,3,5,5,5
3,Adams,Commercial Crossover,sfh,PHEV,0,1,1,1,1,1,1,1,1,1,1,1,1,1
4,Adams,Commercial Sedan,mfh,EV,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
919,Yakima,Personal Sedan,sfh,PHEV,98,2433,4815,6453,7994,9117,10046,10703,11606,12252,12842,13552,14206,14859
920,Yakima,Personal Truck/SUV,mfh,EV,34,1780,3504,5143,6768,8393,9963,11285,12851,14524,16279,18064,19987,22314
921,Yakima,Personal Truck/SUV,mfh,PHEV,18,1063,1757,2265,2617,2945,3170,3367,3593,3845,4102,4380,4667,5020
922,Yakima,Personal Truck/SUV,sfh,EV,133,6521,12813,18796,24713,30637,36335,41141,46932,53211,59811,66703,74132,83107


In [11]:
df.drop_duplicates(subset=['person_id'])

Unnamed: 0,activity_id,person_id,mode,travel_purpose,previous_activity_type,start_time,end_time,distance_miles,vehicle_type,origin_bgrp,origin_bgrp_lat,origin_bgrp_lng,destination_bgrp,destination_bgrp_lat,destination_bgrp_lng,origin_land_use_l1,origin_land_use_l2,origin_building_use_l1,origin_building_use_l2,destination_land_use_l1,destination_land_use_l2,destination_building_use_l1,destination_building_use_l2,origin_lat,origin_lng,destination_lat,destination_lng,weekday,household_id,BLOCKGROUP,BLOCKGROUP_work,BLOCKGROUP_school,TRACT,TRACT_work,TRACT_school,age_group,age,sex,race,ethnicity,individual_income_group,individual_income,employment,education,school_grade_attending,industry,household_role,subfamily_number,subfamily_relationship,commute_mode,tenure,migration,household_size,household_income_group,household_income,family_structure,vehicles,building_type,resident_type,language,lat,lng,lat_work,lng_work,lat_school,lng_school,wfh,charge_type,destination_county,home_cty,home_st
0,18336634547462657450,10000002909730821040,PRIVATE_AUTO,SOCIAL,HOME,0 days 08:20:42,0 days 08:39:00,2.897196,,530250111002,47.098457,-119.319023,530250111003,47.117839,-119.303234,residential,single_family,residential,single_family,residential,single_family,residential,single_family,47.085370,-119.319090,47.118737,-119.298334,thursday,17571687911180552742,530250111002,530250110001,,53025011100,53025011000,,35_64,64.0,M,white,hispanic_or_latino,100000_plus,103145.0,employed,some_college,not_attending_school,naics53,spouse,0,,driving,owner,same_house,2_person,125000_plus,211447.0,married_couple,3_plus,single_family,core,english,47.085370,-119.319090,47.132725,-119.213000,,,worked_in_person,PUBLIC,"Grant County, WA","Grant County, WA",Washington
9,15710281585223970551,10000003183049770926,PRIVATE_AUTO,EAT,SHOP,0 days 11:35:32,0 days 11:44:09,2.751763,,530110408031,45.700708,-122.647018,530110404112,45.714915,-122.645220,commercial,retail,commercial,retail,mixed_use,retail,commercial,retail,45.697545,-122.653456,45.716686,-122.650199,saturday,17212045348023710843,530110408032,,,53011040803,,,65_plus,68.0,F,white,not_hispanic_or_latino,20000_40000,32310.0,notinlf,some_college,not_attending_school,not_working,head_of_household,0,,not_working,owner,same_house,2_person,75000_125000,104124.0,married_couple,2,single_family,core,english,45.702720,-122.632958,,,,,under_16_or_notinlf,PUBLIC,"Clark County, WA","Clark County, WA",Washington
13,6016987872286925293,10000005160936665771,PRIVATE_AUTO,HOME,SOCIAL,0 days 17:47:00,0 days 19:23:02,74.042647,,530330098001,47.578449,-122.383179,530350913013,47.624988,-122.779109,residential,single_family,residential,single_family,residential,single_family,residential,single_family,47.576362,-122.388981,47.656602,-122.778479,saturday,9613290883819190758,530350913013,530350904002,530330054002,53035091301,53035090400,53033005400,18_24,21.0,F,white,not_hispanic_or_latino,0_20000,6977.0,employed,high_school,not_attending_school,naics446191,child,0,,driving,owner,same_house,4_person,75000_125000,86842.0,married_couple,3_plus,single_family,core,english,47.656602,-122.778479,,,47.647402,-122.340894,worked_in_person,HOME,"Kitsap County, WA","Kitsap County, WA",Washington
14,15027999648931808966,10000013294719422231,PRIVATE_AUTO,SOCIAL,HOME,0 days 09:28:46,0 days 09:31:14,0.580337,,530670118221,46.974385,-122.957918,530670118221,46.974385,-122.957918,residential,single_family,residential,single_family,residential,single_family,residential,single_family,46.960341,-122.970871,46.964192,-122.971776,thursday,10349175711236544344,530670118221,,,53067011822,,,65_plus,78.0,M,white,not_hispanic_or_latino,40000_80000,68992.0,notinlf,bachelors_degree,not_attending_school,not_working,head_of_household,0,,not_working,owner,same_house,6_person,125000_plus,144450.0,married_couple,3_plus,single_family,core,english,46.960341,-122.970871,,,,,under_16_or_notinlf,PUBLIC,"Thurston County, WA","Thurston County, WA",Washington
16,15467883780431239164,10000015626584625931,PRIVATE_AUTO,RECREATION,HOME,0 days 09:04:05,0 days 09:12:00,4.264307,,530610519233,47.829657,-122.194194,530610519161,47.798476,-122.224617,residential,single_family,residential,single_family,open_space,open_space,open_space,open_space,47.834402,-122.195658,47.794730,-122.219105,saturday,8829979842484623950,530610519233,530610413014,530330218042,53061051923,53061041301,53033021804,18_24,22.0,F,white,not_hispanic_or_latino,40000_80000,73822.0,employed,some_college,undergraduate,naics62,child,0,,driving,owner,same_house,4_person,125000_plus,241504.0,married_couple,3_plus,single_family,core,english,47.834402,-122.195658,,,47.762263,-122.195394,worked_in_person,PUBLIC,"Snohomish County, WA","Snohomish County, WA",Washington
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25405970,2747739846740175595,12976261446612648060,PRIVATE_AUTO,WORK,HOME,0 days 04:30:30,0 days 05:05:40,18.415900,,530770008001,46.596166,-120.538232,530770029006,46.694867,-120.884526,residential,single_family,residential,single_family,residential,single_family,residential,single_family,46.598331,-120.541278,46.721036,-120.768243,thursday,8778745205760894874,530770008001,,,53077000800,,,65_plus,74.0,M,white,not_hispanic_or_latino,20000_40000,29107.0,notinlf,advanced_degree,not_attending_school,not_working,head_of_household,0,,not_working,owner,same_house,2_person,125000_plus,280103.0,married_couple,2,single_family,core,english,46.598331,-120.541278,,,,,under_16_or_notinlf,WORK,"Yakima County, WA","Yakima County, WA",Washington
25405977,9073209261666799727,1297626145485026953,PRIVATE_AUTO,SCHOOL,HOME,0 days 07:44:43,0 days 08:25:01,9.906512,,530330322113,47.571750,-122.019051,530330323131,47.672274,-122.070886,residential,single_family,residential,single_family,civic_institutional,education,civic_institutional,education,47.569167,-122.020931,47.681105,-122.064953,thursday,4335948110736381613,530330322113,,530330323131,53033032211,,53033032313,15_17,17.0,F,asian,not_hispanic_or_latino,lte_0,0.0,notinlf,k_12,school,not_working,child,0,,not_working,owner,same_house,4_person,125000_plus,1082875.0,married_couple,2,single_family,core,asian_pacific,47.569167,-122.020931,,,47.681105,-122.064953,under_16_or_notinlf,PUBLIC,"King County, WA","King County, WA",Washington
25405979,4964576320702228569,129762617484631390,PRIVATE_AUTO,SHOP,SHOP,0 days 12:33:21,0 days 12:49:19,8.654900,,530770019011,46.265455,-119.897168,530770020014,46.320212,-120.001964,commercial,retail,commercial,retail,commercial,retail,commercial,retail,46.255529,-119.892505,46.322000,-120.014010,thursday,10704282497388641031,530770019012,,,53077001901,,,65_plus,69.0,F,white,hispanic_or_latino,0_20000,7779.0,notinlf,no_school,not_attending_school,not_working,head_of_household,0,,not_working,owner,same_house,4_person,125000_plus,135274.0,family_single,3_plus,single_family,core,spanish,46.251095,-119.893585,,,,,under_16_or_notinlf,PUBLIC,"Yakima County, WA","Yakima County, WA",Washington
25405982,457120398484039395,129762667123447258,PRIVATE_AUTO,HOME,SOCIAL,0 days 08:54:44,0 days 08:56:06,0.299416,,530330303103,47.294703,-122.352428,530330303102,47.287137,-122.347325,residential,single_family,residential,single_family,residential,single_family,residential,single_family,47.294563,-122.345137,47.291426,-122.343578,thursday,1617728171307410716,530330303102,530330303103,,53033030310,53033030310,,35_64,53.0,M,white,hispanic_or_latino,100000_plus,142297.0,employed,high_school,not_attending_school,naics23,spouse,0,,driving,owner,same_house,2_person,125000_plus,161746.0,married_couple,1,single_family,core,english,47.291426,-122.343578,47.300378,-122.359055,,,worked_from_home,HOME,"King County, WA","King County, WA",Washington


In [12]:
# get the unique people in the dataframe
pop_df = df.drop_duplicates(subset=['person_id'])[['person_id', 'home_cty', 'building_type']]

In [13]:
# check outputs
#df_2035 = pd.read_parquet(os.path.join(datadir, 'county_samples/county_sample_2035.parquet'))
# df_2034 = pd.read_parquet(os.path.join(datadir, 'county_samples/county_sample_2034.parquet'))
# len(df_2035)

# this is the most recent model that should be working on full dataset.

try on a single (Yakima) county

In [14]:
# slice the unique dataframe to only include people in that county
county = 'Yakima'
county_str = county + ' County, WA'
county_cond = pop_df['home_cty'] == county_str 
county_pop_df = pop_df[county_cond].copy() 
county_trip_df = df[df['home_cty'] == county_str].copy() 
nvehicles_sub = nev_df[nev_df['County'] == county].copy()
df.shape, county_trip_df.shape, pop_df.shape, county_pop_df.shape

((25405990, 71), (930192, 71), (5046603, 3), (148278, 3))

clone people in the county

In [37]:
clone = county_pop_df.copy()
county_trip_df = df[df['person_id'].isin(county_pop_df['person_id'])].copy()
county_trip_clone = county_trip_df.copy()
county_trip_clone['person_id'] = county_trip_clone['person_id']+'clone'
clone['person_id'] = clone['person_id']+'clone'
county_trip_extended = pd.concat([county_trip_df, county_trip_clone])
county_pop_extended = pd.concat([county_pop_df, clone])




In [39]:
clone

Unnamed: 0,person_id,home_cty,building_type
282,10000157299148475764clone,"Yakima County, WA",single_family
340,10000187826355988094clone,"Yakima County, WA",single_family
361,10000191068586386178clone,"Yakima County, WA",single_family
990,10000673915974666862clone,"Yakima County, WA",single_family
1031,10000704400120532797clone,"Yakima County, WA",single_family
...,...,...,...
25405638,12976034979162133229clone,"Yakima County, WA",GQ_structure
25405932,12976240533849475456clone,"Yakima County, WA",single_family
25405946,12976249970443606240clone,"Yakima County, WA",single_family
25405970,12976261446612648060clone,"Yakima County, WA",single_family


there are more vehicles in 2035 than people living in the county. Since we have a "one person = one vehicle" assumption we cant model these people. We can either multiply the load and chargers by a scaler to match the total number or clone people.

In [15]:
# wmobile_df = merged_df.loc[(merged_df['mode'] == 'PRIVATE_AUTO') & (merged_df['home_cty'] == 'Yakima County, WA')].copy()

total_vehicles_in_yakima = nvehicles_sub['2035'].sum()
total_vehicles_in_yakima, county_pop_df.shape[0], full_df.shape[0], total_vehicles_in_yakima - county_pop_df.shape[0]

(184420, 148278, 167119, 36142)

In [32]:
def exclusionary_sampler(df: pd.DataFrame, population_df: pd.DataFrame, nev_df: pd.DataFrame, county: str, year: str) -> pd.DataFrame:
    year = str(year)
    
    # subset the nev_df to only include the county
    # do we prioritize the the most number of vehicles
    # this option has the most remainders
    # nvehicles_sub = nev_df.sort_values(by=year, ascending=False)
    
    # or by ~least number of vehicles
    nvehicles_sub = nev_df

    # Create a list to keep track of selected individuals for each combination
    already_sampled_people = []

    # Create a list to store the cnty dataframes.
    cnty_df_list = []

    
    remainder = 0
    # Iterate over the county DataFrame and sample individuals from the population DataFrame
    for _, row in nvehicles_sub.iterrows():
        segment_remainder = 0

        county = row['County']
        vehicle_type = row['Vehicle_type']
        domicile = row['domicile']
        count = row[year]
        # engine = row['Powertrain']
        powertrain = row['Powertrain']

        if count < 0:
            count = 0

        # slice the datafrane to only include people with the correct domicile
        if domicile == 'sfh':
            domicile_cond = population_df['building_type'] == 'single_family'
        else:
            domicile_cond = population_df['building_type'] != 'single_family'

        # filter the county population based on domicile
        filtered_population = population_df[(domicile_cond)]

        # exclude already selected individuals for this combination
        already_sampled_cond = filtered_population['person_id'].isin(
            already_sampled_people)
        filtered_people = filtered_population[~already_sampled_cond].copy()

        # check if there are more vehicles than people left in the county
        # if so draw from the general population
        tot_people_left_sub = filtered_people.shape[0]
        if tot_people_left_sub == 0:
            print(vehicle_type, domicile, powertrain)
        if tot_people_left_sub <= count:
            remainder += (count - tot_people_left_sub)
            segment_remainder = count - tot_people_left_sub
            count = filtered_people.shape[0]
            print(f"segment_remainder for {vehicle_type} ={segment_remainder} for {tot_people_left_sub} people in {county}")
            print(f"fraction of segment remainder = {segment_remainder/tot_people_left_sub}")
            segment_remainder = 0 
            """            tot_people_in_sub = filtered_people.shape[0]
            remainder = count - tot_people_in_sub
            # in this case the sampled pop is the entire county leftover.
            sampled_population = filtered_people
            print("total people in county and domicile", tot_people_in_sub)
            print("remainder", remainder)
            print(vehicle_type, domicile, powertrain)
            
            # filtered population is only a cut on the domicile
            # some people with the same domicile will have
            # multiple vehicles.
            sampled_from_domicile_population = population_df.sample(
                n=remainder, replace=False, random_state=42)
            full_county_plus_sampled_poeple = pd.concat([sampled_population, sampled_from_domicile_population])
            sampled_individuals = full_county_plus_sampled_poeple['person_id'].to_list()   """ 
            
            # filtered_people = population_df.copy()
        
        # sample 'count' number of individuals
        sampled_population = filtered_people.sample(
            n=count, replace=False, random_state=42)
        sampled_individuals = sampled_population['person_id'].to_list()

        # Update the selected individuals list
        already_sampled_people.extend(sampled_individuals)

        # print(f'Sampled {count} individuals from County: {county}, Vehicle Type: {vehicle_type}, Domicile: {domicile}, Powertrain: {powertrain}\n')

        # grab only those selected people with this combination
        # of county, domicile, and vehicle type and powertrain.
        cnty_df = df[(df['person_id'].isin(sampled_individuals))].copy()
        cnty_df['engine'] = powertrain
        cnty_df['segment'] = vehicle_type
        cnty_df['efficiency'] = simdu.segment_efficiency(vehicle_type)
        cnty_df['year'] = str(year)
        ctny_df = simdu.phev_efficiency_milage(cnty_df, powertrain)
        ctny_df['charge_type'] = ctny_df.apply(simdu.map_charge_type, axis=1)
        cnty_df_list.append(cnty_df)

    print("remainder", remainder)
    full_county_df = pd.concat(cnty_df_list)
    return full_county_df

In [38]:
full_county_df = exclusionary_sampler(county_trip_extended, county_pop_extended, nvehicles_sub, county, 2035)

remainder 0


In [18]:
36142/county_pop_df.shape[0]


0.2437448576322853

In [19]:
full_county_df.drop_duplicates(subset=['person_id']).shape, county_pop_df.shape, full_county_df.shape

((148278, 75), (148278, 3), (930192, 75))

In [42]:
def clone_people(df, clonestr='clone'):
    clone = county_pop_df.copy()
    county_trip_df = df[df['person_id'].isin(county_pop_df['person_id'])].copy()
    county_trip_clone = county_trip_df.copy()
    county_trip_clone['person_id'] = county_trip_clone['person_id']+clonestr
    clone['person_id'] = clone['person_id']+clonestr
    county_trip_extended = pd.concat([county_trip_df, county_trip_clone])
    county_pop_extended = pd.concat([county_pop_df, clone])
    return county_pop_extended, county_trip_extended

In [47]:
def clone_dfs(df: pd.DataFrame, clonestr: str='clone'):
    """ There are more vehicles in the future projections 
    than people in the county.
    
    We can cheat by cloning the main dataframe to have more 
    people to sample from.

    Args:
        df (pd.DataFrame): full trips df
        clonestr (str, optional): _description_. Defaults to 'clone'.

    Returns:
        _type_: _description_
    """
    trip_clone = df.copy()
    trip_clone['person_id'] = trip_clone['person_id']+clonestr
    trip_extended = pd.concat([df, trip_clone])

    pop_df = trip_extended.drop_duplicates(subset=['person_id'])[['person_id', 'home_cty', 'building_type']]
    return trip_extended, pop_df

In [49]:
1500/60

25.0

In [48]:
df_extended, pop_df_extended = clone_dfs(df)

In [46]:
# slice the unique dataframe to only include people in that county
county_list = nev_df['County'].unique()
year = str(2035)

def make_sampled_county_df(df, pop_df, nev_df, county: str, year: str):
    county_str = county + ' County, WA'
    county_cond = pop_df_extended['home_cty'] == county_str 
    county_pop_df = pop_df[county_cond].copy()  
    county_trip_df = df[df['home_cty'] == county_str].copy() 
    nvehicles_sub = nev_df[nev_df['County'] == county].copy()

    full_county_df = exclusionary_sampler(county_trip_df, county_pop_df, nvehicles_sub, county, year)
    return full_county_df

all_county_in_year_list = []
for county in county_list:
    # here we filter each df to only include people in that county
    # and randomly sample from that county
    full_county_df = make_sampled_county_df(df_extended, pop_df_extended, nev_df, county, year)

    # add that county df to the list
    all_county_in_year_list.append(full_county_df)

# stack 'em all together to make a full df of trips for the year
all_county_in_year_df = pd.concat(all_county_in_year_list)


NameError: name 'df_extended' is not defined