# Analyze SPC construction projects

In [1]:
import pandas as pd

### Import latest PennDOT bridge data

In [2]:
bridges = pd.read_csv('input/BridgeCondRpt.csv')

In [3]:
bridges.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6649 entries, 0 to 6648
Data columns (total 25 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   State/Local               6649 non-null   object 
 1   Condition                 6649 non-null   object 
 2   BR Key                    6649 non-null   int64  
 3   Bridge Id                 6649 non-null   int64  
 4   County                    6649 non-null   object 
 5   Municipality              6621 non-null   object 
 6   Location/Structure Name   6649 non-null   object 
 7   Feature Carried           6649 non-null   object 
 8   Feature Intersected       6646 non-null   object 
 9   Owner                     6649 non-null   object 
 10  Length (ft)               6649 non-null   int64  
 11  Deck Area (sq ft)         6649 non-null   float64
 12  Number of Spans           6649 non-null   int64  
 13  Material                  6649 non-null   object 
 14  Structur

### Import construction projects

In [4]:
projects = pd.read_csv('input/projects.csv')
projects = projects.rename(columns={' Total ': 'Total'})

In [5]:
projects.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1013 entries, 0 to 1012
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                1013 non-null   int64  
 1   Planning Partner  1013 non-null   object 
 2   District          1013 non-null   int64  
 3   County            1013 non-null   object 
 4   Project           1013 non-null   int64  
 5   Title             1013 non-null   object 
 6   SR                975 non-null    float64
 7   Sec               1001 non-null   object 
 8   Project Class     1013 non-null   object 
 9   BRIDGE KEY        1013 non-null   int64  
 10  UDF Narrative     11 non-null     object 
 11  Phase             1013 non-null   object 
 12  Fund              1013 non-null   object 
 13   2023 Tot         1013 non-null   object 
 14   2024 Tot         1013 non-null   object 
 15   2025 Tot         1013 non-null   object 
 16   2026 Tot         1013 non-null   object 


In [6]:
projects['Total'] = projects.Total.str.replace('$', '', regex=False)
projects['Total'] = projects.Total.str.replace(',', '')
projects['Total'] = projects.Total.astype(float).astype(int)

In [7]:
projects = projects.groupby(['Project', 'BRIDGE KEY', 'Title']).Total.aggregate('sum').to_frame().reset_index()

### Merge projects and PennDOT bridge data

In [8]:
pj = pd.merge(
    projects, bridges,
    left_on='BRIDGE KEY', right_on='BR Key',
    how='left',
    indicator=True
)

In [9]:
pj[pj._merge == 'left_only'].sort_values(by='BRIDGE KEY')

Unnamed: 0,Project,BRIDGE KEY,Title,Total,State/Local,Condition,BR Key,Bridge Id,County,Municipality,...,Posting Status,Weight Limit Single,Weight Limit Combination,Deck Condition,Superstructure Condition,Substructure Condition,Culvert Condition,Planning Partner,Average Daily Traffic,_merge
31,26971,487,Butler Street Bridge,39169,,,,,,,...,,,,,,,,,,left_only
573,117472,744,Bridge over Route 51 Near Woodruff Street,4500000,,,,,,,...,,,,,,,,,,left_only
209,100711,1942,Union Avenue over Spruce Run,1000000,,,,,,,...,,,,,,,,,,left_only
49,27806,2401,Corliss Tunnel,13600000,,,,,,,...,,,,,,,,,,left_only
384,110319,2410,Forbes Avenue ov Fern Hollow,548500,,,,,,,...,,,,,,,,,,left_only
182,93922,2502,AR01 - Armstrong Tunnel,1600000,,,,,,,...,,,,,,,,,,left_only
45,27549,2651,Overland Avenue Bridge,1360000,,,,,,,...,,,,,,,,,,left_only
14,24715,7772,South of Cooperstown Br #1,55983,,,,,,,...,,,,,,,,,,left_only
25,25696,19147,US 422 Cunningham Culverts,70800,,,,,,,...,,,,,,,,,,left_only
26,25696,19148,US 422 Cunningham Culverts,70800,,,,,,,...,,,,,,,,,,left_only


### Look at bridges by owner, condition

In [10]:
bridgepiv = pd.pivot_table(bridges[['Owner', 'Condition']], index=['Owner'], columns='Condition', values='Condition', aggfunc='size')
bridgepiv['Total'] = bridgepiv.sum(axis=1)

In [11]:
bridgepiv['P%'] = 100 * (bridgepiv.Poor / bridgepiv.Total)
bridgepiv['P%_total'] = 100 * (bridgepiv.Poor / bridgepiv.Poor.sum())
bridgepiv['%_total'] = 100 * (bridgepiv.Total / bridgepiv.Total.sum())

In [12]:
bridgepiv

Condition,Fair,Good,Poor,Total,P%,P%_total,%_total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH",118,39,68,225,30.222222,7.423581,3.383968
COUNTY HIGHWAY AGENCY,349,200,127,676,18.786982,13.864629,10.166942
OTHER LOCAL AGENCIES,12,6,5,23,21.73913,0.545852,0.345917
RAILROAD,10,4,15,29,51.724138,1.637555,0.436156
STATE HIGHWAY AGENCY,2975,1721,562,5258,10.688475,61.353712,79.079561
TOWN OR TOWNSHIP HIGHWAY AGENCY,198,101,139,438,31.73516,15.174672,6.587457


### Look at projects by bridge owner, total cost

In [13]:
projcost = pj.groupby('Owner').Total.sum().to_frame()
projcost['%_Total'] = projcost.Total / projcost.Total.sum()
projcost

Unnamed: 0_level_0,Total,%_Total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1
"CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH",158965384,0.068384
COUNTY HIGHWAY AGENCY,178926224,0.076971
RAILROAD,2680000,0.001153
STATE HIGHWAY AGENCY,1962202847,0.844105
TOWN OR TOWNSHIP HIGHWAY AGENCY,21821580,0.009387


### Look at projects by bridge owner, condition

In [14]:
pd.pivot_table(pj[['Owner', 'Condition']], index=['Owner'], columns='Condition', values='Condition', aggfunc='size')

Condition,Fair,Good,Poor
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH",6.0,,22.0
COUNTY HIGHWAY AGENCY,23.0,1.0,25.0
RAILROAD,1.0,,1.0
STATE HIGHWAY AGENCY,271.0,76.0,131.0
TOWN OR TOWNSHIP HIGHWAY AGENCY,1.0,,12.0
