In [1]:
import pandas as pd
import numpy as np
import math
import toml
import os
import input_data_processing


data_config = toml.load(os.path.join(os.getcwd(), "config_raw_data.toml"))


## Read from spreadsheet and create dataframe inputs


## PV Factor and CPI
   - used to canculate constant local transit revenue

In [15]:
parameter = pd.read_csv(data_config['parameter']).astype({'Year':'int64'})
parameter

Unnamed: 0,Year,PV factor,CPI,indecies
0,1975,5.312,0.5066,
1,1976,5.047,0.5332,1.052507
2,1977,4.658,0.5778,1.083646
3,1978,4.241,0.6345,1.098131
4,1979,3.864,0.6965,1.097715
...,...,...,...,...
71,2046,0.605,4.4493,1.025941
72,2047,0.590,4.5578,1.024386
73,2048,0.578,4.6596,1.022335
74,2049,0.565,4.7590,1.021332


# Population
  - create function to calculate all year

In [3]:
subarea_population = pd.read_csv(data_config['data_subarea_population'])

result_subarea_population = pd.DataFrame()
for i_county in data_config['all_counties']:
    for i_pop_area in subarea_population.PopulationArea.unique():

        df_pop = subarea_population[
            (subarea_population['County'] == i_county) & (subarea_population['PopulationArea'] == i_pop_area)].copy()

        if df_pop.empty:
            pass

        else:
            start_year = int(df_pop['Year'].min())
            end_year = int(df_pop['Year'].max())
            # fill in missing years
            df_pop = input_data_processing.fill_year(df_pop, ['County', 'PopulationArea'], start_year, end_year)
            # calculate interpolated population
            df_pop['Population'] = input_data_processing.interpolate_population(df_pop, 'Population', start_year, end_year)
            df_pop = df_pop[['County', 'PopulationArea', 'Year', 'Population']]

            result_subarea_population = pd.concat([result_subarea_population, df_pop], ignore_index=True)

result_subarea_population

Unnamed: 0,County,PopulationArea,Year,Population
0,King County,Cities,2000,1387812.0
1,King County,Cities,2001,1404829.0
2,King County,Cities,2002,1427263.0
3,King County,Cities,2003,1436622.0
4,King County,Cities,2004,1443623.0
...,...,...,...,...
1015,Snohomish County,City Transit,2046,182008.0
1016,Snohomish County,City Transit,2047,184374.0
1017,Snohomish County,City Transit,2048,186772.0
1018,Snohomish County,City Transit,2049,189200.0


### Actual transit boardings (000s) from Local Transit Tab
- Year 2018 and before

In [4]:
# Local Transit - Total Fixed-Route boardings
transit_boardings = pd.read_csv(data_config['data_boardings_local_transit'])

result_boarding = pd.DataFrame()
for agency in data_config['county_transit']:

    df_agency = transit_boardings[(transit_boardings['Transit Agency'] == agency)].copy()

    # fill in missing years
    start_year = df_agency['Year'].min()

    df_agency = input_data_processing.fill_year(df_agency, 'Transit Agency', start_year, data_config['end_year'])

    # last and next year with population values
    # Everett transit using City Transit Population for boarding estimation, others using PTBA
    if agency == "Everett Transit":
        df_agency1 = pd.merge(df_agency, result_subarea_population[
            (result_subarea_population['County'] == data_config['county_transit'][agency]) &
            (result_subarea_population['PopulationArea'] == 'City Transit')],
                              how="left", on="Year")
    else:
        df_agency1 = pd.merge(df_agency, result_subarea_population[
            (result_subarea_population['County'] == data_config['county_transit'][agency]) &
            (result_subarea_population['PopulationArea'] == 'PTBA')],
                              how="left", on="Year")

    # calculation
    for year in range(start_year, data_config['end_year'] + 1):
        miss = df_agency1.loc[df_agency1['Year'] == year, 'Boardings'].item()

        if np.isnan(miss):
            boarding_prev_year = df_agency1.loc[df_agency1['Year'] == year - 1, 'Boardings'].item()
            pop = df_agency1.loc[df_agency1['Year'] == year, 'Population'].item()
            pop_prev_year = df_agency1.loc[df_agency1['Year'] == year - 1, 'Population'].item()

            df_agency1.loc[df_agency1['Year'] == year, 'Boardings'] = boarding_prev_year * pop / pop_prev_year

        else:
            pass

    result_boarding = pd.concat([result_boarding, df_agency1], ignore_index=True)

result_boarding = result_boarding[['Transit Agency', 'Year', 'Boardings']]

result_boarding

Unnamed: 0,Transit Agency,Year,Boardings
0,Community Transit,1989,4.022714e+06
1,Community Transit,1990,4.138693e+06
2,Community Transit,1991,4.916127e+06
3,Community Transit,1992,5.057887e+06
4,Community Transit,1993,5.289549e+06
...,...,...,...
305,Pierce Transit,2046,1.390027e+07
306,Pierce Transit,2047,1.403806e+07
307,Pierce Transit,2048,1.417722e+07
308,Pierce Transit,2049,1.431775e+07


# Fare per Boarding


In [6]:
# Average Fixed-Route fare per boarding with periodic increases
fare_per_boarding = pd.read_csv(data_config["data_fare_per_boarding_local_transit"])

result_fare_per_boarding = pd.DataFrame()
for agency in data_config['county_transit']:
    fare_per_boarding1 = fare_per_boarding.copy()
    # get the last year with actual value for creating periodic increases
    last_value_year = max(fare_per_boarding1['Year'])
    # base list of fare change period
    list_grp = sorted(list(range(1,data_config['end_year']-last_value_year)) * data_config['transit_fare_change_period'][agency])

    fare_per_boarding1 = input_data_processing.fill_year(fare_per_boarding1[fare_per_boarding1['Transit Agency']==agency], 'Transit Agency', 1989, data_config['end_year'])

    # Average Fixed-Route fare per boarding by Annual Growth Rate: This has the fare grown by the average annual growth rate for each transit agency based on previous 20 year history
    for year in range(1989, data_config['end_year'] + 1):

        miss = fare_per_boarding1.loc[fare_per_boarding1['Year'] == year, 'Average Fare per Boarding ($)'].item()

        if np.isnan(miss):

            fare_prev_year = fare_per_boarding1.loc[fare_per_boarding1['Year'] == year - 1, 'Average Fare per Boarding ($)'].item()

            fare_per_boarding1.loc[fare_per_boarding1['Year'] == year, 'Average Fare per Boarding ($)'] = fare_prev_year + (fare_prev_year * data_config['transit_annual_fare_increase'][agency])

        else:
            pass

    # add grouping column (trim base list to needed length)
    fare_per_boarding1['list_grp'] = list([0] * (last_value_year-1989)) + list_grp[0:data_config['end_year']-last_value_year+1]
    # Average Fixed-Route fare per boarding with periodic increases
    fare_per_boarding1.loc[fare_per_boarding1['list_grp']>0,'Average Fare per Boarding ($)'] = fare_per_boarding1.loc[fare_per_boarding1['list_grp']>0,['list_grp','Average Fare per Boarding ($)']].groupby(['list_grp'])['Average Fare per Boarding ($)'].transform(min)
    result_fare_per_boarding = pd.concat([result_fare_per_boarding, fare_per_boarding1[['Year', 'Transit Agency', 'Average Fare per Boarding ($)']]], ignore_index=True)
result_fare_per_boarding

Unnamed: 0,Year,Transit Agency,Average Fare per Boarding ($)
0,1989,Community Transit,0.811890
1,1990,Community Transit,0.854134
2,1991,Community Transit,0.880978
3,1992,Community Transit,0.952374
4,1993,Community Transit,0.984394
...,...,...,...
305,2046,Pierce Transit,2.934295
306,2047,Pierce Transit,2.934295
307,2048,Pierce Transit,3.489484
308,2049,Pierce Transit,3.489484


# Actual transit revenues (Nominal $millions) from Local Transit Tab


In [18]:
transit_revenue = pd.read_csv(data_config['data_revenue_local_transit'])

# calculate constant revenue with PV factor
transit_revenue = pd.merge(transit_revenue, parameter[['Year', 'PV factor']], how="left" , on="Year")
transit_revenue["Constant"] = transit_revenue['Nominal'] * transit_revenue['PV factor']
transit_revenue

Unnamed: 0,Revenue Type,Transit Agency,Year,Nominal,PV factor,Constant
0,Sales & Use Tax,Community Transit,1989,8088000.0,2.288,1.850534e+07
1,MVET,Community Transit,1989,8088000.0,2.288,1.850534e+07
2,Fares,Community Transit,1989,3266000.0,2.288,7.472608e+06
3,Sales & Use Tax,Everett Transit,1989,4177000.0,2.288,9.556976e+06
4,MVET,Everett Transit,1989,0.0,2.288,0.000000e+00
...,...,...,...,...,...,...
625,Non-PSRC FHWA,Pierce Transit,2020,0.0,0.963,0.000000e+00
626,PSRC FTA,Pierce Transit,2020,8351312.0,0.963,8.042313e+06
627,Non-PSRC FTA,Pierce Transit,2020,0.0,0.963,0.000000e+00
628,State,Pierce Transit,2020,0.0,0.963,0.000000e+00


### Fare revenue calculation
- Year 2018 and before

In [14]:
transit_fare_revenue = pd.merge(result_boarding[['Year', 'Transit Agency', 'Boardings']],
                                result_fare_per_boarding[['Year', 'Transit Agency', 'Average Fare per Boarding ($)']],
                                on=['Year', 'Transit Agency'])
transit_fare_revenue['Nominal'] = transit_fare_revenue['Boardings'] * transit_fare_revenue['Average Fare per Boarding ($)']
transit_fare_revenue['Revenue Type'] = "Fares"
transit_fare_revenue = transit_fare_revenue[['Revenue Type', 'Transit Agency', 'Year', 'Nominal']]

transit_fare_revenue

Unnamed: 0,Revenue Type,Transit Agency,Year,Nominal
0,Fares,Community Transit,1989,3.266000e+06
1,Fares,Community Transit,1990,3.535000e+06
2,Fares,Community Transit,1991,4.331000e+06
3,Fares,Community Transit,1992,4.817000e+06
4,Fares,Community Transit,1993,5.207000e+06
...,...,...,...,...
305,Fares,Pierce Transit,2046,4.078749e+07
306,Fares,Pierce Transit,2047,4.119180e+07
307,Fares,Pierce Transit,2048,4.947117e+07
308,Fares,Pierce Transit,2049,4.996156e+07


In [9]:
df_transit_revenue_funding = pd.read_csv(data_config['data_revenue_local_transit'])
df_transit_revenue_funding

Unnamed: 0,Revenue Type,Transit Agency,Year,Nominal
0,Sales & Use Tax,Community Transit,1989,8088000.0
1,MVET,Community Transit,1989,8088000.0
2,Fares,Community Transit,1989,3266000.0
3,Sales & Use Tax,Everett Transit,1989,4177000.0
4,MVET,Everett Transit,1989,0.0
...,...,...,...,...
625,Non-PSRC FHWA,Pierce Transit,2020,0.0
626,PSRC FTA,Pierce Transit,2020,8351312.0
627,Non-PSRC FTA,Pierce Transit,2020,0.0
628,State,Pierce Transit,2020,0.0


## Local Transit Revenue: PSRC FHWA, PSRC FTA

In [11]:
# predict_list = ['PSRC FHWA','Non-PSRC FHWA','PSRC FTA','Non-PSRC FTA','State','Other Federal']

# previous year times 1.025
predict_list_psrc = ['PSRC FHWA','PSRC FTA']
max_year = max(df_transit_revenue_funding['Year'])

# first predicting year uses the average of the previous five years
df = df_transit_revenue_funding[(df_transit_revenue_funding['Revenue Type'].isin(predict_list_psrc)) & (df_transit_revenue_funding['Year'] > max_year-5)].copy()

df

Unnamed: 0,Revenue Type,Transit Agency,Year,Nominal
408,PSRC FHWA,Community Transit,2016,608182.0
410,PSRC FTA,Community Transit,2016,11341165.0
417,PSRC FHWA,Everett Transit,2016,422469.0
419,PSRC FTA,Everett Transit,2016,2202226.0
426,PSRC FHWA,King County Metro,2016,2165900.0
428,PSRC FTA,King County Metro,2016,79768173.0
435,PSRC FHWA,Kitsap Transit,2016,2309321.0
437,PSRC FTA,Kitsap Transit,2016,4165730.0
444,PSRC FHWA,Pierce Transit,2016,373401.0
446,PSRC FTA,Pierce Transit,2016,7999060.0


In [13]:
transit_revenue_funding = pd.DataFrame()

for predict in predict_list_psrc:
    for agency in data_config['county_transit'].keys():
        df_transit_revenue_funding = df[(df['Transit Agency']==agency) & (df['Revenue Type']==predict)]
        df_transit_revenue_funding = input_data_processing.fill_year(df_transit_revenue_funding, ['Revenue Type', 'Transit Agency'], 2016, data_config['end_year'])

        df_transit_revenue_funding.loc[df_transit_revenue_funding['Year'] == max_year + 1, 'Nominal'] = df_transit_revenue_funding['Nominal'].mean() * 1.025

        for year in range(max_year+2, data_config['end_year'] + 1):
            funding_prev_year = df_transit_revenue_funding.loc[df_transit_revenue_funding['Year'] == year - 1, 'Nominal'].item()
            df_transit_revenue_funding.loc[df_transit_revenue_funding['Year'] == year, 'Nominal'] = funding_prev_year * 1.025
        transit_revenue_funding = pd.concat([transit_revenue_funding, df_transit_revenue_funding], ignore_index=True)


transit_revenue_funding

Unnamed: 0,Year,Revenue Type,Transit Agency,Nominal
0,2016,PSRC FHWA,Community Transit,6.081820e+05
1,2017,PSRC FHWA,Community Transit,6.112090e+05
2,2018,PSRC FHWA,Community Transit,6.197090e+05
3,2019,PSRC FHWA,Community Transit,6.940990e+05
4,2020,PSRC FHWA,Community Transit,7.137080e+05
...,...,...,...,...
345,2046,PSRC FTA,Pierce Transit,1.591272e+07
346,2047,PSRC FTA,Pierce Transit,1.631053e+07
347,2048,PSRC FTA,Pierce Transit,1.671830e+07
348,2049,PSRC FTA,Pierce Transit,1.713625e+07


### Annual Tax Base [Scenario: Base Model - Updated February, 2019]
- Year: 1975 - 2050


In [15]:
df = pd.read_csv("data_actual/tax_base_actual.csv")
df

tax_base = pd.melt(df, id_vars=['County', 'Tax Base Category'], value_vars=df.columns[2:], var_name='Year',
                   value_name='Values').dropna()
# transit_boardings['Boardings'] = transit_boardings['Boardings (000s)'].apply(lambda x: x.strip().replace(',', '')).astype({'Boardings (000s)':float})*1e3
tax_base["Multiplier"] = 1e6
tax_base.loc[tax_base["Tax Base Category"].str.contains("000s"), "Multiplier"] = 1e3
tax_base.loc[tax_base["Tax Base Category"].str.contains("Diesel"), "Multiplier"] = 1
tax_base["True Value"] = tax_base["Multiplier"] * tax_base["Values"]
tax_base