# Analyze SPC construction projects

In [1]:
import pandas as pd

### Import latest PennDOT bridge data

In [2]:
bridges = pd.read_csv('input/onemap.csv', usecols=['BRKEY', 'REPORTGROUP', 'OWNER', 'CONDITION'])

bridges = bridges[
    bridges.REPORTGROUP.isin(['L1', 'L2', 'L3', 'S1', 'S2', 'S3', 'T1']) |
    bridges.BRKEY.isin([744])
]

bridges = bridges.rename(columns={'OWNER': 'OldOwner'})
bridges['Owner'] = ''
bridges.loc[bridges.OldOwner.isin([1, 2, 3, 4]), 'Owner'] = bridges.OldOwner
bridges['Owner'] = bridges.OldOwner.replace({
    1: 'STATE HIGHWAY AGENCY',
    2: 'COUNTY HIGHWAY AGENCY',
    3: 'BOROUGH-CITY-TOWNSHIP',
    4: 'BOROUGH-CITY-TOWNSHIP',
    25: 'OTHER LOCAL AGENCIES',
    27: 'RAILROAD'
})
bridges = bridges.drop(columns=['OldOwner'])

bridges['CONDITION'] = bridges.CONDITION.replace({
    'G': 'Good',
    'F': 'Fair',
    'P': 'Poor'
})

In [3]:
bridges.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10034 entries, 0 to 13146
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   REPORTGROUP  10034 non-null  object
 1   CONDITION    9992 non-null   object
 2   BRKEY        10034 non-null  int64 
 3   Owner        10034 non-null  object
dtypes: int64(1), object(3)
memory usage: 650.0+ KB


##### Manually set some bridge keys

In [4]:
bridges.loc[bridges.BRKEY == 68423, 'BRKEY'] = 1942
bridges.loc[bridges.BRKEY == 1942, 'CONDITION'] = 'Poor'

bridges.loc[bridges.BRKEY == 69253, 'BRKEY'] = 7772
bridges.loc[bridges.BRKEY == 7772, 'CONDITION'] = 'Poor'

bridges.loc[bridges.BRKEY == 68617, 'BRKEY'] = 19148
bridges.loc[bridges.BRKEY == 19148, 'CONDITION'] = 'Fair'

bridges.loc[bridges.BRKEY == 67464, 'BRKEY'] = 36697
bridges.loc[bridges.BRKEY == 36697, 'CONDITION'] = 'Poor'

bridges.loc[bridges.BRKEY == 67810, 'BRKEY'] = 22413

hardcodedbridges = pd.DataFrame([
    ### Add Fern Hollow Bridge -- old & new bridge keys don't exist in database
    ['L1', 'Poor', 2410, 'BOROUGH-CITY-TOWNSHIP'],

    ### Add US 422 Cunningham Culverts -- old & new bridge keys don't exist in database
    ['S3', 'Poor', 19147, 'STATE HIGHWAY AGENCY'],

    ### Add Overland Avenue Bridge -- demolished without replacement
    ['L1', 'Poor', 2651, 'BOROUGH-CITY-TOWNSHIP'],
], columns=bridges.columns)

bridges = pd.concat([bridges, hardcodedbridges])

### Duplicate the Butler Street Bridge -- work performed on old & new bridge keys
bridges.loc[bridges.BRKEY == 48912, 'CONDITION'] = 'Poor'

butlerst = bridges[bridges.BRKEY == 48912].values[0]
butlerst[2] = 487
bridges.loc[len(bridges)] = butlerst

### Import construction projects

In [5]:
rawprojects = pd.read_csv('input/projects.csv')
rawprojects = rawprojects.rename(columns={' Total ': 'Total'})

In [6]:
rawprojects.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048 entries, 0 to 1047
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                1048 non-null   int64  
 1   Planning Partner  1048 non-null   object 
 2   District          1048 non-null   int64  
 3   County            1048 non-null   object 
 4   Project           1048 non-null   int64  
 5   Title             1048 non-null   object 
 6   SR                1009 non-null   float64
 7   Sec               1035 non-null   object 
 8   Project Class     1048 non-null   object 
 9   BRIDGE KEY        1047 non-null   float64
 10  UDF Narrative     11 non-null     object 
 11  Phase             1048 non-null   object 
 12  Fund              1048 non-null   object 
 13   2023 Tot         1048 non-null   object 
 14   2024 Tot         1048 non-null   object 
 15   2025 Tot         1048 non-null   object 
 16   2026 Tot         1048 non-null   object 


In [7]:
rawprojects['Total'] = rawprojects.Total.str.replace('$', '', regex=False)
rawprojects['Total'] = rawprojects.Total.str.replace(',', '')
rawprojects['Total'] = rawprojects.Total.astype(float).astype(int)

### Merge projects and PennDOT bridge data

In [8]:
rawprojects = pd.merge(
    rawprojects, bridges,
    left_on='BRIDGE KEY', right_on='BRKEY',
    how='left',
    indicator=True
)

In [9]:
rawprojects[rawprojects._merge == 'left_only'].sort_values(by='BRIDGE KEY')

Unnamed: 0,ID,Planning Partner,District,County,Project,Title,SR,Sec,Project Class,BRIDGE KEY,...,2028 Tot,2029 Tot,2030 Tot,2031-34 Tot,Total,REPORTGROUP,CONDITION,BRKEY,Owner,_merge
1044,663,Interstate,10,Clarion,109300,Clarion River Bridge,80.0,351.0,Bridge Preservation - Federal,10951.0,...,0,0,0,0,8200000,,,,,left_only
887,513,SPC,12,Fayette,81229,D12 Bridge Preservation Design,,,Bridge Restoration,,...,$0.00,2000000,1000000,20000000,23000000,,,,,left_only


In [10]:
projects = rawprojects.groupby(['BRKEY', 'Owner', 'CONDITION', 'REPORTGROUP']).Total.aggregate('sum').to_frame().reset_index()

### Look at bridges by owner, condition

In [11]:
bridgepiv = pd.pivot_table(bridges[['Owner', 'CONDITION']], index=['Owner'], columns='CONDITION', values='CONDITION', aggfunc='size')
bridgepiv['Total'] = bridgepiv.sum(axis=1)

In [12]:
bridgepiv['P%'] = 100 * (bridgepiv.Poor / bridgepiv.Total)
bridgepiv['P%_total'] = 100 * (bridgepiv.Poor / bridgepiv.Poor.sum())
bridgepiv['%_total'] = 100 * (bridgepiv.Total / bridgepiv.Total.sum())

In [13]:
bridgepiv

CONDITION,Fair,Good,N,Poor,Total,P%,P%_total,%_total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BOROUGH-CITY-TOWNSHIP,356,156,2826,234,3572,6.550952,23.901941,35.734294
COUNTY HIGHWAY AGENCY,494,269,133,154,1050,14.666667,15.730337,10.504202
OTHER LOCAL AGENCIES,17,10,10,5,42,11.904762,0.510725,0.420168
RAILROAD,10,4,8,15,37,40.540541,1.532176,0.370148
STATE HIGHWAY AGENCY,2986,1728,10,571,5295,10.783758,58.324821,52.971188


### Look at projects by bridge owner, condition

In [14]:
pd.pivot_table(projects[['Owner', 'CONDITION']], index=['Owner'], columns='CONDITION', values='CONDITION', aggfunc='size')

CONDITION,Fair,Good,N,Poor
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BOROUGH-CITY-TOWNSHIP,8.0,,,39.0
COUNTY HIGHWAY AGENCY,23.0,1.0,1.0,26.0
RAILROAD,1.0,,,1.0
STATE HIGHWAY AGENCY,234.0,71.0,1.0,132.0


### Look at projects by owner, NBI status

In [15]:
poorproj = projects.groupby('Owner').apply(lambda x: pd.Series(dict(
    nonNBIproj=len(x[(x.CONDITION == 'Poor') & (~x.REPORTGROUP.str.endswith('1'))].BRKEY),
    NBIproj=len(x[(x.CONDITION == 'Poor') & (x.REPORTGROUP.str.endswith('1'))].BRKEY),
)))

poorbrid = bridges.groupby('Owner').apply(lambda x: pd.Series(dict(
    nonNBIbr=len(x[(x.CONDITION == 'Poor') & (~x.REPORTGROUP.str.endswith('1'))].BRKEY),
    NBIbr=len(x[(x.CONDITION == 'Poor') & (x.REPORTGROUP.str.endswith('1'))].BRKEY),
)))

comparison = pd.merge(poorproj, poorbrid, on='Owner')
comparison['%_NBI'] = comparison.NBIproj / comparison.NBIbr
comparison['%_nonNBI'] = comparison.nonNBIproj / comparison.nonNBIbr
comparison['%_Total'] = (comparison.NBIproj + comparison.nonNBIproj) / (comparison.NBIbr + comparison.nonNBIbr)

comparison

Unnamed: 0_level_0,nonNBIproj,NBIproj,nonNBIbr,NBIbr,%_NBI,%_nonNBI,%_Total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BOROUGH-CITY-TOWNSHIP,0,39,24,210,0.185714,0.0,0.166667
COUNTY HIGHWAY AGENCY,0,26,27,127,0.204724,0.0,0.168831
RAILROAD,0,1,0,15,0.066667,,0.066667
STATE HIGHWAY AGENCY,29,103,303,268,0.384328,0.09571,0.231173


### Sum project costs by bridge owner

In [16]:
projcost = projects.groupby('Owner').Total.sum().to_frame()
projcost['%_Total'] = projcost.Total / projcost.Total.sum()
projcost

Unnamed: 0_level_0,Total,%_Total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1
BOROUGH-CITY-TOWNSHIP,203475464,0.084515
COUNTY HIGHWAY AGENCY,182526224,0.075813
RAILROAD,2680000,0.001113
STATE HIGHWAY AGENCY,2018889891,0.838559


### Look at projects by fund

In [17]:
projectsbyfund = rawprojects.groupby('Fund').Total.aggregate('sum').to_frame()
projectsbyfund['pct'] = projectsbyfund.Total / projectsbyfund.Total.sum()
projectsbyfund

Unnamed: 0_level_0,Total,pct
Fund,Unnamed: 1_level_1,Unnamed: 2_level_1
BOF,305533864,0.125282
BRIP,1000943697,0.410429
HSIP,548500,0.000225
NFP,56879000,0.023323
NHPP,434157733,0.178023
STP,173096935,0.070977
STU,466916778,0.191456
SXF,695072,0.000285


In [18]:
bripprojects = rawprojects[rawprojects.Fund == 'BRIP'].groupby(['Owner']).Total.aggregate('sum').to_frame()
bripprojects['pct'] = bripprojects.Total / bripprojects.Total.sum()
bripprojects

Unnamed: 0_level_0,Total,pct
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1
BOROUGH-CITY-TOWNSHIP,44954000,0.045968
COUNTY HIGHWAY AGENCY,56292000,0.057562
RAILROAD,1600000,0.001636
STATE HIGHWAY AGENCY,875097697,0.894834
