# Analyze SPC construction projects

In [1]:
import pandas as pd

### Import latest PennDOT bridge data

In [2]:
bridges = pd.read_csv('input/BridgeCondRpt.csv')

In [3]:
bridges.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6649 entries, 0 to 6648
Data columns (total 25 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   State/Local               6649 non-null   object 
 1   Condition                 6649 non-null   object 
 2   BR Key                    6649 non-null   int64  
 3   Bridge Id                 6649 non-null   int64  
 4   County                    6649 non-null   object 
 5   Municipality              6621 non-null   object 
 6   Location/Structure Name   6649 non-null   object 
 7   Feature Carried           6649 non-null   object 
 8   Feature Intersected       6646 non-null   object 
 9   Owner                     6649 non-null   object 
 10  Length (ft)               6649 non-null   int64  
 11  Deck Area (sq ft)         6649 non-null   float64
 12  Number of Spans           6649 non-null   int64  
 13  Material                  6649 non-null   object 
 14  Structur

##### Manually set some bridge keys

In [4]:
bridges.loc[bridges['BR Key'] == 68423, 'BR Key'] = 1942
bridges.loc[bridges['BR Key'] == 1942, 'Condition'] = 'Poor'

bridges.loc[bridges['BR Key'] == 69253, 'BR Key'] = 7772
bridges.loc[bridges['BR Key'] == 7772, 'Condition'] = 'Poor'

bridges.loc[bridges['BR Key'] == 68617, 'BR Key'] = 19148
bridges.loc[bridges['BR Key'] == 19148, 'Condition'] = 'Fair'

bridges.loc[bridges['BR Key'] == 67464, 'BR Key'] = 36697
bridges.loc[bridges['BR Key'] == 36697, 'Condition'] = 'Poor'

### Add North Tower Road Bridge -- old & new bridge keys don't exist in database
bridges.loc[len(bridges)] = [
    'S', 'Poor', 22413, 0, '', '',
    '', '', '',
    'STATE HIGHWAY AGENCY', 0, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Fern Hollow Bridge -- old & new bridge keys don't exist in database
bridges.loc[len(bridges)] = [
    'L', 'Poor', 2410, 0, '', '',
    '', '', '',
    'CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH', 0, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add US 422 Cunningham Culverts -- old & new bridge keys don't exist in database
bridges.loc[len(bridges)] = [
    'L', 'Poor', 19147, 0, '', '',
    '', '', '',
    'STATE HIGHWAY AGENCY', 0, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Overland Avenue Bridge -- demolished without replacement
bridges.loc[len(bridges)] = [
    'L', 'Poor', 2651, 0, '', '',
    '', '', '',
    'CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH', 0, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Rt 51 Bridge -- doesn't exist in database
bridges.loc[len(bridges)] = [
    'L', 'Poor', 744, 0, '', '',
    '', '', '',
    'CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH', 0, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Armstrong Tunnel -- tunnels don't exist in state database
bridges.loc[len(bridges)] = [
    'L', '', 2502, 0, '', '',
    '', '', '',
    'COUNTY HIGHWAY AGENCY', 0, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Add Corliss Tunnel -- tunnels don't exist in state database
bridges.loc[len(bridges)] = [
    'L', '', 2401, 0, '', '',
    '', '', '',
    'CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH', 0, 0, 0, '', '', 0,
    '', 0, 0, '', '',
    '', '', 'SPC', 0
]

### Duplicate the Butler Street Bridge -- work performed on old & new bridge keys
bridges.loc[bridges['BR Key'] == 48912, 'Condition'] = 'Poor'

butlerst = bridges[bridges['BR Key'] == 48912].values[0]
butlerst[1] = 'Poor'
butlerst[2] = 487
bridges.loc[len(bridges)] = butlerst

### Import construction projects

In [5]:
projects = pd.read_csv('input/projects.csv')
projects = projects.rename(columns={' Total ': 'Total'})

In [6]:
projects.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1013 entries, 0 to 1012
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                1013 non-null   int64  
 1   Planning Partner  1013 non-null   object 
 2   District          1013 non-null   int64  
 3   County            1013 non-null   object 
 4   Project           1013 non-null   int64  
 5   Title             1013 non-null   object 
 6   SR                975 non-null    float64
 7   Sec               1001 non-null   object 
 8   Project Class     1013 non-null   object 
 9   BRIDGE KEY        1013 non-null   int64  
 10  UDF Narrative     11 non-null     object 
 11  Phase             1013 non-null   object 
 12  Fund              1013 non-null   object 
 13   2023 Tot         1013 non-null   object 
 14   2024 Tot         1013 non-null   object 
 15   2025 Tot         1013 non-null   object 
 16   2026 Tot         1013 non-null   object 


In [7]:
projects['Total'] = projects.Total.str.replace('$', '', regex=False)
projects['Total'] = projects.Total.str.replace(',', '')
projects['Total'] = projects.Total.astype(float).astype(int)

In [8]:
projects = projects.groupby(['Project', 'BRIDGE KEY', 'Title']).Total.aggregate('sum').to_frame().reset_index()

### Merge projects and PennDOT bridge data

In [9]:
projects = pd.merge(
    projects, bridges,
    left_on='BRIDGE KEY', right_on='BR Key',
    how='left',
    indicator=True
)

In [10]:
projects[projects._merge == 'left_only'].sort_values(by='BRIDGE KEY')

Unnamed: 0,Project,BRIDGE KEY,Title,Total,State/Local,Condition,BR Key,Bridge Id,County,Municipality,...,Posting Status,Weight Limit Single,Weight Limit Combination,Deck Condition,Superstructure Condition,Substructure Condition,Culvert Condition,Planning Partner,Average Daily Traffic,_merge


### Look at bridges by owner, condition

In [11]:
bridgepiv = pd.pivot_table(bridges[['Owner', 'Condition']], index=['Owner'], columns='Condition', values='Condition', aggfunc='size')
bridgepiv['Total'] = bridgepiv.sum(axis=1)

In [12]:
bridgepiv['P%'] = 100 * (bridgepiv.Poor / bridgepiv.Total)
bridgepiv['P%_total'] = 100 * (bridgepiv.Poor / bridgepiv.Poor.sum())
bridgepiv['%_total'] = 100 * (bridgepiv.Total / bridgepiv.Total.sum())

In [13]:
bridgepiv

Condition,Unnamed: 1_level_0,Fair,Good,Poor,Total,P%,P%_total,%_total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH",1.0,118.0,39.0,71.0,229.0,31.004367,7.667387,3.439988
COUNTY HIGHWAY AGENCY,1.0,349.0,200.0,127.0,677.0,18.759232,13.714903,10.169746
OTHER LOCAL AGENCIES,,12.0,6.0,5.0,23.0,21.73913,0.539957,0.345501
RAILROAD,,10.0,4.0,15.0,29.0,51.724138,1.61987,0.435632
STATE HIGHWAY AGENCY,,2976.0,1716.0,569.0,5261.0,10.815434,61.447084,79.029593
TOWN OR TOWNSHIP HIGHWAY AGENCY,,198.0,101.0,139.0,438.0,31.73516,15.010799,6.57954


### Sum project costs by bridge owner

In [14]:
projcost = projects.groupby('Owner').Total.sum().to_frame()
projcost['%_Total'] = projcost.Total / projcost.Total.sum()
projcost

Unnamed: 0_level_0,Total,%_Total
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1
"CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH",178973884,0.076226
COUNTY HIGHWAY AGENCY,180526224,0.076888
RAILROAD,2680000,0.001141
STATE HIGHWAY AGENCY,1963921199,0.83645
TOWN OR TOWNSHIP HIGHWAY AGENCY,21821580,0.009294


### Look at projects by bridge owner, condition

In [15]:
pd.pivot_table(projects[['Owner', 'Condition']], index=['Owner'], columns='Condition', values='Condition', aggfunc='size')

Condition,Unnamed: 1_level_0,Fair,Good,Poor
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"CITY, MUNICIPAL, HIGHWAY AGENCY, OR BOROUGH",1.0,6.0,,25.0
COUNTY HIGHWAY AGENCY,1.0,23.0,1.0,25.0
RAILROAD,,1.0,,1.0
STATE HIGHWAY AGENCY,,272.0,75.0,138.0
TOWN OR TOWNSHIP HIGHWAY AGENCY,,1.0,,12.0
