# Initial Data Processing
- Read in raw data from Excel spreadsheets (from Excel_data folder)
- Reformat raw data to input for financial tool (saved to Input folder)


## Replace datasets

In [1]:
# Replace datasets
replace_parameter = False
replace_subarea_population = False
replace_boardings_local_transit = False
replace_fare_per_boarding_local_transit = False
replace_revenue_local_transit = True
replace_tax_base = False

## Paths
data from excel spreadsheets

In [2]:
#  Local Transit Tab
## Transit revenues (Nominal $millions)
Excel_data_revenue_local_transit = "script_data/Excel_data/local_transit_actual_revenue.csv"
## Total Fixed-Route boardings
Excel_data_boardings_local_transit = "script_data/Excel_data/local_transit_actual_boardings.csv"         # FIXME: consider RTP forecast data
## Fixed-Route fare per boarding
Excel_data_fare_per_boarding_local_transit = "script_data/Excel_data/local_transit_fare_per_boarding.csv"

# Tax Base Tab
Excel_data_tax_base = "script_data/Excel_data/tax_base_actual.csv"
Excel_data_parameter = "script_data/Excel_data/parameters_tax.csv"

# Subarea Allocation Bases Tab
Excel_data_subarea_population = "script_data/Excel_data/subarea_allocation_bases_population_actual.csv"

In [4]:
import pandas as pd
import numpy as np
import math
import toml
import os
import forecasting_tool


data_config = toml.load(os.path.join(os.getcwd(), "../configuration.toml"))

In [20]:
parameter = pd.read_csv(Excel_data_parameter).astype({'Year':'int64'})

# add Calculate inflation using CPI (current year vs. previous year)
for year in range(min(parameter['Year'])+1, max(parameter['Year'])+1):
    cpi_prev_year = parameter.loc[parameter['Year'] == year - 1, 'CPI'].item()
    parameter.loc[parameter['Year'] == year, 'indecies'] = parameter.loc[parameter['Year'] == year, 'CPI'] / cpi_prev_year


if replace_parameter:
    parameter.to_csv(data_config['input_parameter'],index=False)

parameter

Unnamed: 0,Year,PV factor,CPI,indecies
0,1975,5.312,0.5066,
1,1976,5.047,0.5332,1.052507
2,1977,4.658,0.5778,1.083646
3,1978,4.241,0.6345,1.098131
4,1979,3.864,0.6965,1.097715
...,...,...,...,...
71,2046,0.605,4.4493,1.025941
72,2047,0.590,4.5578,1.024386
73,2048,0.578,4.6596,1.022335
74,2049,0.565,4.7590,1.021332


In [12]:
# Subarea Allocation Bases - Population
df = pd.read_csv(Excel_data_subarea_population)
subarea_population = pd.melt(df,
                             id_vars=['County', 'PopulationArea'],
                             value_vars=df.columns[2:],
                             var_name='Year',
                             value_name='Population').dropna().astype(
    {'Year': 'int64', 'County': str, 'PopulationArea': str, 'Population': 'int64'})

if replace_subarea_population:
    subarea_population.to_csv(data_config['input_subarea_population'],index=False)

subarea_population

Unnamed: 0,County,PopulationArea,Year,Population
0,King County,Cities,2000,1387812
1,King County,Unincorp.,2000,349234
2,King County,Total,2000,1737046
3,King County,PTBA,2000,1737046
4,King County,Sound Transit,2000,1466331
...,...,...,...,...
515,Snohomish County,Unincorp.,2050,480505
516,Snohomish County,Total,2050,1206021
517,Snohomish County,PTBA,2050,861111
518,Snohomish County,Sound Transit,2050,737650


In [11]:
# Local Transit - Total Fixed-Route boardings
df = pd.read_csv(Excel_data_boardings_local_transit)

transit_boardings = pd.melt(df,
                            id_vars='Transit Agency',
                            value_vars=df.columns[1:],
                            var_name='Year',
                            value_name='Boardings (000s)').dropna().astype({'Year': 'int64'})

transit_boardings['Boardings'] = transit_boardings['Boardings (000s)'].apply(
    lambda x: x.strip().replace(',', '')).astype({'Boardings (000s)': float}) * 1e3
transit_boardings['Boardings'] = transit_boardings['Boardings'].astype({'Boardings': int})
transit_boardings = transit_boardings[['Transit Agency', 'Year', 'Boardings']]

if replace_boardings_local_transit:
    transit_boardings.to_csv(data_config['input_local_transit_boardings'],index=False)

transit_boardings

Unnamed: 0,Transit Agency,Year,Boardings
0,Community Transit,1989,4022714
1,Everett Transit,1989,1293449
2,King County Metro,1989,77434636
3,Kitsap Transit,1989,2397979
4,Pierce Transit,1989,10531566
...,...,...,...
145,Community Transit,2018,14374078
146,Everett Transit,2018,1929323
147,King County Metro,2018,128389832
148,Kitsap Transit,2018,3828754


In [10]:
# Average Fixed-Route fare per boarding with periodic increases
df = pd.read_csv(Excel_data_fare_per_boarding_local_transit)

fare_per_boarding = pd.melt(df, id_vars='Transit Agency', value_vars=df.columns[1:], var_name = 'Year', value_name='Average Fare per Boarding ($)').dropna().astype({'Year': 'int64','Average Fare per Boarding ($)':'float'})


if replace_fare_per_boarding_local_transit:
    fare_per_boarding.to_csv(data_config['input_local_transit_fare_per_boarding'],index=False)

fare_per_boarding

Unnamed: 0,Transit Agency,Year,Average Fare per Boarding ($)
0,Community Transit,1989,0.811890
1,Everett Transit,1989,0.214929
2,King County Metro,1989,0.474413
3,Kitsap Transit,1989,0.309010
4,Pierce Transit,1989,0.334518
...,...,...,...
145,Community Transit,2018,1.558291
146,Everett Transit,2018,0.634419
147,King County Metro,2018,1.370389
148,Kitsap Transit,2018,1.719619


In [5]:
# Local Transit Revenue
df = pd.read_csv(Excel_data_revenue_local_transit)

# wide to long
transit_revenue = pd.melt(df, id_vars=['Revenue Type', 'Transit Agency'], value_vars=df.columns[2:], var_name = 'Year', value_name='Nominal $millions').dropna().astype({'Year':'int64'})
# calculate normal value
transit_revenue['Nominal'] = transit_revenue['Nominal $millions']*1e6
transit_revenue = transit_revenue[['Revenue Type', 'Transit Agency', 'Year', 'Nominal']]


if replace_revenue_local_transit:
 transit_revenue.to_csv(data_config['data_revenue_local_transit'],index=False)

transit_revenue

Unnamed: 0,Revenue Type,Transit Agency,Year,Nominal
0,Sales & Use Tax,Community Transit,1989,8088000.0
1,MVET,Community Transit,1989,8088000.0
2,Fares,Community Transit,1989,3266000.0
9,Sales & Use Tax,Everett Transit,1989,4177000.0
10,MVET,Everett Transit,1989,0.0
...,...,...,...,...
1435,Non-PSRC FHWA,Pierce Transit,2020,0.0
1436,PSRC FTA,Pierce Transit,2020,8351312.0
1437,Non-PSRC FTA,Pierce Transit,2020,0.0
1438,State,Pierce Transit,2020,0.0


In [31]:
df = pd.read_csv(Excel_data_tax_base)
df

tax_base = pd.melt(df, id_vars=['County','Tax Base Category'], value_vars=df.columns[2:], var_name = 'Year', value_name='Values').dropna()
# transit_boardings['Boardings'] = transit_boardings['Boardings (000s)'].apply(lambda x: x.strip().replace(',', '')).astype({'Boardings (000s)':float})*1e3
tax_base["Multiplier"] = 1e6
tax_base.loc[tax_base["Tax Base Category"].str.contains("000s"), "Multiplier"] = 1e3
tax_base.loc[tax_base["Tax Base Category"].str.contains("Diesel"), "Multiplier"] = 1
tax_base["Value"] = tax_base["Multiplier"] * tax_base["Values"]
tax_base["Tax Base Category"] = np.where(tax_base["Tax Base Category"] == 'Personal Income (nominal $mil) ',
                                          'Personal Income (nominal)',
                                 np.where(tax_base["Tax Base Category"] == 'Population (000s) ','Population',
                                 np.where(tax_base["Tax Base Category"] == 'Employment (000s) ','Employment',
                                 np.where(tax_base["Tax Base Category"] == 'Retail Sales (nominal $mil) ','Retail Sales (nominal)',
                                 np.where(tax_base["Tax Base Category"] == 'Motor Fuel (gal. Mil.) ','Motor Fuel (gal.)',
                                 np.where(tax_base["Tax Base Category"] == 'Cars and Gas Trucks (000s)','Cars and Gas Trucks',
                                 np.where(tax_base["Tax Base Category"] == 'Diesel','Diesel',tax_base["Tax Base Category"])))))))
tax_base = tax_base[['County', 'Tax Base Category', 'Year','Value']]

if replace_tax_base:
    tax_base.to_csv(data_config['data_tax_base'],index=False)

tax_base

Unnamed: 0,County,Tax Base Category,Year,Value
0,4-County regional Totals,Personal Income (nominal),1975,1.333858e+10
1,4-County regional Totals,Population,1975,1.963090e+06
2,4-County regional Totals,Employment,1975,7.192383e+05
3,4-County regional Totals,Retail Sales (nominal),1975,7.428915e+09
4,4-County regional Totals,Motor Fuel (gal.),1975,9.415027e+08
...,...,...,...,...
1819,Pierce County,Cars and Gas Trucks,2050,1.160648e+06
1820,Snohomish County,Population,2050,1.215036e+06
1821,Snohomish County,Employment,2050,5.055630e+05
1822,Snohomish County,Retail Sales (nominal),2050,3.795396e+10


In [7]:
tax_base

Unnamed: 0,County,Tax Base Category,Year,Values,Multiplier,True Value
0,4-County regional Totals,Personal Income (nominal $mil),1975,13338.578310,1000000.0,1.333858e+10
1,4-County regional Totals,Population (000s),1975,1963.090000,1000.0,1.963090e+06
2,4-County regional Totals,Employment (000s),1975,719.238335,1000.0,7.192383e+05
3,4-County regional Totals,Retail Sales (nominal $mil),1975,7428.915395,1000000.0,7.428915e+09
4,4-County regional Totals,Motor Fuel (gal. Mil.),1975,941.502657,1000000.0,9.415027e+08
...,...,...,...,...,...,...
1819,Pierce County,Cars and Gas Trucks (000s),2050,1160.648093,1000.0,1.160648e+06
1820,Snohomish County,Population (000s),2050,1215.036446,1000.0,1.215036e+06
1821,Snohomish County,Employment (000s),2050,505.563009,1000.0,5.055630e+05
1822,Snohomish County,Retail Sales (nominal $mil),2050,37953.962990,1000000.0,3.795396e+10
