# Analyze SPC construction projects

In [1]:
import pandas as pd

### Import latest PennDOT bridge data

In [2]:
bridges = pd.read_csv('input/BridgeCondRpt.csv')
bridges.loc[bridges['Owner'].isin([
    'CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH',
    'TOWN OR TOWNSHIP HIGHWAY AGENCY'
]), 'Owner'] = 'BOROUGH-CITY-TOWNSHIP'

In [3]:
bridges.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6649 entries, 0 to 6648
Data columns (total 25 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   State/Local               6649 non-null   object 
 1   Condition                 6649 non-null   object 
 2   BR Key                    6649 non-null   int64  
 3   Bridge Id                 6649 non-null   int64  
 4   County                    6649 non-null   object 
 5   Municipality              6621 non-null   object 
 6   Location/Structure Name   6649 non-null   object 
 7   Feature Carried           6649 non-null   object 
 8   Feature Intersected       6646 non-null   object 
 9   Owner                     6649 non-null   object 
 10  Length (ft)               6649 non-null   int64  
 11  Deck Area (sq ft)         6649 non-null   float64
 12  Number of Spans           6649 non-null   int64  
 13  Material                  6649 non-null   object 
 14  Structur

##### Manually set some bridge keys

In [4]:
bridges.loc[bridges['BR Key'] == 68423, 'BR Key'] = 1942
bridges.loc[bridges['BR Key'] == 1942, 'Condition'] = 'Poor'

bridges.loc[bridges['BR Key'] == 69253, 'BR Key'] = 7772
bridges.loc[bridges['BR Key'] == 7772, 'Condition'] = 'Poor'

bridges.loc[bridges['BR Key'] == 68617, 'BR Key'] = 19148
bridges.loc[bridges['BR Key'] == 19148, 'Condition'] = 'Fair'

bridges.loc[bridges['BR Key'] == 67464, 'BR Key'] = 36697
bridges.loc[bridges['BR Key'] == 36697, 'Condition'] = 'Poor'

### Add North Tower Road Bridge -- old (22413) & new (67810) bridge keys don't exist in database
bridges.loc[len(bridges)] = [
    'S', 'Poor', 22413, 0, '', '',
    '', '', '',
    'STATE HIGHWAY AGENCY', 20, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Fern Hollow Bridge -- old & new bridge keys don't exist in database
bridges.loc[len(bridges)] = [
    'L', 'Poor', 2410, 0, '', '',
    '', '', '',
    'BOROUGH-CITY-TOWNSHIP', 136.2, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add US 422 Cunningham Culverts -- old & new bridge keys don't exist in database
bridges.loc[len(bridges)] = [
    'L', 'Poor', 19147, 0, '', '',
    '', '', '',
    'STATE HIGHWAY AGENCY', 0, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Overland Avenue Bridge -- demolished without replacement
bridges.loc[len(bridges)] = [
    'L', 'Poor', 2651, 0, '', '',
    '', '', '',
    'BOROUGH-CITY-TOWNSHIP', 22.9, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Rt 51 Bridge -- doesn't exist in database
bridges.loc[len(bridges)] = [
    'L', 'Poor', 744, 0, '', '',
    '', '', '',
    'BOROUGH-CITY-TOWNSHIP', 67, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Armstrong Tunnel -- tunnels don't exist in state database
bridges.loc[len(bridges)] = [
    'L', '', 2502, 0, '', '',
    '', '', '',
    'COUNTY HIGHWAY AGENCY', 1298, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Corliss Tunnel -- tunnels don't exist in state database
bridges.loc[len(bridges)] = [
    'L', '', 2401, 0, '', '',
    '', '', '',
    'BOROUGH-CITY-TOWNSHIP', 451, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Duplicate the Butler Street Bridge -- work performed on old & new bridge keys
bridges.loc[bridges['BR Key'] == 48912, 'Condition'] = 'Poor'

butlerst = bridges[bridges['BR Key'] == 48912].values[0]
butlerst[1] = 'Poor'
butlerst[2] = 487
bridges.loc[len(bridges)] = butlerst

### Import construction projects

In [5]:
rawprojects = pd.read_csv('input/projects.csv')
rawprojects = rawprojects.rename(columns={' Total ': 'Total'})

In [6]:
rawprojects.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048 entries, 0 to 1047
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                1048 non-null   int64  
 1   Planning Partner  1048 non-null   object 
 2   District          1048 non-null   int64  
 3   County            1048 non-null   object 
 4   Project           1048 non-null   int64  
 5   Title             1048 non-null   object 
 6   SR                1009 non-null   float64
 7   Sec               1035 non-null   object 
 8   Project Class     1048 non-null   object 
 9   BRIDGE KEY        1047 non-null   float64
 10  UDF Narrative     11 non-null     object 
 11  Phase             1048 non-null   object 
 12  Fund              1048 non-null   object 
 13   2023 Tot         1048 non-null   object 
 14   2024 Tot         1048 non-null   object 
 15   2025 Tot         1048 non-null   object 
 16   2026 Tot         1048 non-null   object 


In [7]:
rawprojects['Total'] = rawprojects.Total.str.replace('$', '', regex=False)
rawprojects['Total'] = rawprojects.Total.str.replace(',', '')
rawprojects['Total'] = rawprojects.Total.astype(float).astype(int)

### Merge projects and PennDOT bridge data

In [8]:
rawprojects = pd.merge(
    rawprojects, bridges,
    left_on='BRIDGE KEY', right_on='BR Key',
    how='left',
    indicator=True
)

In [9]:
rawprojects[rawprojects._merge == 'left_only'].sort_values(by='BRIDGE KEY')

Unnamed: 0,ID,Planning Partner_x,District,County_x,Project,Title,SR,Sec,Project Class,BRIDGE KEY,...,Posting Status,Weight Limit Single,Weight Limit Combination,Deck Condition,Superstructure Condition,Substructure Condition,Culvert Condition,Planning Partner_y,Average Daily Traffic,_merge
1044,663,Interstate,10,Clarion,109300,Clarion River Bridge,80.0,351.0,Bridge Preservation - Federal,10951.0,...,,,,,,,,,,left_only
887,513,SPC,12,Fayette,81229,D12 Bridge Preservation Design,,,Bridge Restoration,,...,,,,,,,,,,left_only


In [10]:
projects = rawprojects.groupby(['Project', 'BR Key', 'Title', 'Owner', 'Condition', 'Length (ft)']).Total.aggregate('sum').to_frame().reset_index()

### Look at bridges by owner, condition

In [11]:
bridgepiv = pd.pivot_table(bridges[['Owner', 'Condition']], index=['Owner'], columns='Condition', values='Condition', aggfunc='size')
bridgepiv['Total'] = bridgepiv.sum(axis=1)

In [12]:
bridgepiv['P%'] = 100 * (bridgepiv.Poor / bridgepiv.Total)
bridgepiv['P%_total'] = 100 * (bridgepiv.Poor / bridgepiv.Poor.sum())
bridgepiv['%_total'] = 100 * (bridgepiv.Total / bridgepiv.Total.sum())

In [13]:
bridgepiv

Condition,Unnamed: 1_level_0,Fair,Good,Poor,Total,P%,P%_total,%_total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BOROUGH-CITY-TOWNSHIP,1.0,316.0,140.0,210.0,667.0,31.484258,22.678186,10.019528
COUNTY HIGHWAY AGENCY,1.0,349.0,200.0,127.0,677.0,18.759232,13.714903,10.169746
OTHER LOCAL AGENCIES,,12.0,6.0,5.0,23.0,21.73913,0.539957,0.345501
RAILROAD,,10.0,4.0,15.0,29.0,51.724138,1.61987,0.435632
STATE HIGHWAY AGENCY,,2976.0,1716.0,569.0,5261.0,10.815434,61.447084,79.029593


### Look at projects by bridge owner, condition

In [14]:
pd.pivot_table(projects[['Owner', 'Condition']], index=['Owner'], columns='Condition', values='Condition', aggfunc='size')

Condition,Unnamed: 1_level_0,Fair,Good,Poor
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BOROUGH-CITY-TOWNSHIP,1.0,7.0,,39.0
COUNTY HIGHWAY AGENCY,1.0,23.0,1.0,26.0
RAILROAD,,1.0,,1.0
STATE HIGHWAY AGENCY,,276.0,75.0,142.0


### Look at projects by owner, NBI status

In [15]:
poorproj = projects.groupby('Owner').apply(lambda x: pd.Series(dict(
    nonNBIproj=len(x[(x['Condition'] == 'Poor') & (x['Length (ft)'] <= 20)]['BR Key']),
    NBIproj=len(x[(x['Condition'] == 'Poor') & (x['Length (ft)'] > 20)]['BR Key']),
)))

poorbrid = bridges.groupby('Owner').apply(lambda x: pd.Series(dict(
    nonNBIbr=len(x[(x['Condition'] == 'Poor') & (x['Length (ft)'] <= 20)]['BR Key']),
    NBIbr=len(x[(x['Condition'] == 'Poor') & (x['Length (ft)'] > 20)]['BR Key']),
)))

comparison = pd.merge(poorproj, poorbrid, on='Owner')
comparison['%_NBI'] = comparison.NBIproj / comparison.NBIbr
comparison['%_nonNBI'] = comparison.nonNBIproj / comparison.nonNBIbr
comparison['%_Total'] = (comparison.NBIproj + comparison.nonNBIproj) / (comparison.NBIbr + comparison.nonNBIbr)

comparison

Unnamed: 0_level_0,nonNBIproj,NBIproj,nonNBIbr,NBIbr,%_NBI,%_nonNBI,%_Total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BOROUGH-CITY-TOWNSHIP,0,39,1,209,0.186603,0.0,0.185714
COUNTY HIGHWAY AGENCY,0,26,0,127,0.204724,,0.204724
RAILROAD,0,1,0,15,0.066667,,0.066667
STATE HIGHWAY AGENCY,33,109,298,271,0.402214,0.110738,0.249561


### Sum project costs by bridge owner

In [16]:
projcost = projects.groupby('Owner').Total.sum().to_frame()
projcost['%_Total'] = projcost.Total / projcost.Total.sum()
projcost

Unnamed: 0_level_0,Total,%_Total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1
BOROUGH-CITY-TOWNSHIP,203475464,0.084515
COUNTY HIGHWAY AGENCY,182526224,0.075813
RAILROAD,2680000,0.001113
STATE HIGHWAY AGENCY,2018889891,0.838559


### Look at projects by fund

In [17]:
projectsbyfund = rawprojects.groupby('Fund').Total.aggregate('sum').to_frame()
projectsbyfund['pct'] = projectsbyfund.Total / projectsbyfund.Total.sum()
projectsbyfund

Unnamed: 0_level_0,Total,pct
Fund,Unnamed: 1_level_1,Unnamed: 2_level_1
BOF,305533864,0.125282
BRIP,1000943697,0.410429
HSIP,548500,0.000225
NFP,56879000,0.023323
NHPP,434157733,0.178023
STP,173096935,0.070977
STU,466916778,0.191456
SXF,695072,0.000285


In [18]:
bripprojects = rawprojects[rawprojects.Fund == 'BRIP'].groupby(['Owner']).Total.aggregate('sum').to_frame()
bripprojects['pct'] = bripprojects.Total / bripprojects.Total.sum()
bripprojects

Unnamed: 0_level_0,Total,pct
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1
BOROUGH-CITY-TOWNSHIP,44954000,0.045968
COUNTY HIGHWAY AGENCY,56292000,0.057562
RAILROAD,1600000,0.001636
STATE HIGHWAY AGENCY,875097697,0.894834
