# Analyze SPC construction projects

In [1]:
import pandas as pd

### Import latest NBI data

In [2]:
nbi = pd.read_csv('output/nbi2022-formatted.csv')

In [3]:
nbi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4788 entries, 0 to 4787
Data columns (total 3 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   STRUCTURE_NUMBER_008  4788 non-null   object
 1   BridgeCondition       4788 non-null   object
 2   SimpleOwner           4788 non-null   object
dtypes: object(3)
memory usage: 112.3+ KB


### Import construction projects

In [4]:
projects = pd.read_csv('input/projects.csv')
projects = projects.rename(columns={' Total ': 'Total'})
projects['BRIDGE KEY'] = projects['BRIDGE KEY'].astype(str).str.zfill(15)

In [5]:
projects.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1013 entries, 0 to 1012
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                1013 non-null   int64  
 1   Planning Partner  1013 non-null   object 
 2   District          1013 non-null   int64  
 3   County            1013 non-null   object 
 4   Project           1013 non-null   int64  
 5   Title             1013 non-null   object 
 6   SR                975 non-null    float64
 7   Sec               1001 non-null   object 
 8   Project Class     1013 non-null   object 
 9   BRIDGE KEY        1013 non-null   object 
 10  UDF Narrative     11 non-null     object 
 11  Phase             1013 non-null   object 
 12  Fund              1013 non-null   object 
 13   2023 Tot         1013 non-null   object 
 14   2024 Tot         1013 non-null   object 
 15   2025 Tot         1013 non-null   object 
 16   2026 Tot         1013 non-null   object 


In [6]:
projects['Total'] = projects.Total.str.replace('$', '', regex=False)
projects['Total'] = projects.Total.str.replace(',', '')
projects['Total'] = projects.Total.astype(float).astype(int)

In [7]:
projects = projects.groupby(['Project', 'BRIDGE KEY', 'Title']).Total.aggregate('sum').to_frame().reset_index()

### Merge projects and NBI data

In [8]:
projects = pd.merge(projects, nbi, left_on='BRIDGE KEY', right_on='STRUCTURE_NUMBER_008')

In [9]:
projects

Unnamed: 0,Project,BRIDGE KEY,Title,Total,STRUCTURE_NUMBER_008,BridgeCondition,SimpleOwner
0,23833,000000000003378,T-466 Saint Charles Br.,4306000,000000000003378,P,County
1,23978,000000000003085,Graff Bridge Preservation,10620400,000000000003085,F,State
2,24014,000000000003182,Elgin Cemetery Bridge,1500000,000000000003182,F,State
3,24135,000000000003178,Pyra Road Bridge,2738500,000000000003178,P,State
4,24136,000000000003177,Brick Church Bridge #2,2151500,000000000003177,P,State
...,...,...,...,...,...,...,...
534,117652,000000000008037,Lardintown Bridge #4,1738184,000000000008037,P,State
535,117669,000000000003411,T-860 Beagle Club Bridge,1372900,000000000003411,P,Municipal
536,117682,000000000019232,Cypress Bridge,2842370,000000000019232,P,State
537,117685,000000000019081,First Sergeant Alexander Kelly Memorial Bridge,11516826,000000000019081,P,State


### Look at bridges by owner, condition

In [10]:
nbipiv = pd.pivot_table(nbi[['SimpleOwner', 'BridgeCondition']], index=['SimpleOwner'], columns='BridgeCondition', values='BridgeCondition', aggfunc='size')
nbipiv['Total'] = nbipiv.sum(axis=1)

In [11]:
nbipiv['P%'] = 100 * (nbipiv.P / nbipiv.Total)
nbipiv['P%_total'] = 100 * (nbipiv.P / nbipiv.P.sum())

In [12]:
nbipiv

BridgeCondition,F,G,P,Total,P%,P%_total
SimpleOwner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
County,341,199,133,673,19.762259,21.077655
Federal,1,1,1,3,33.333333,0.158479
Municipal,328,139,201,668,30.08982,31.8542
Other,33,6,21,60,35.0,3.328051
State,1793,1316,275,3384,8.126478,43.581616


### Look at projects by bridge owner, total cost

In [13]:
projcost = projects.groupby('SimpleOwner').Total.sum().to_frame()
projcost['%_Total'] = projcost.Total / projcost.Total.sum()
projcost

Unnamed: 0_level_0,Total,%_Total
SimpleOwner,Unnamed: 1_level_1,Unnamed: 2_level_1
County,178926224,0.085674
Municipal,181335464,0.086827
Other,2680000,0.001283
State,1725518641,0.826216


### Look at projects by bridge owner, condition

In [14]:
pd.pivot_table(projects[['SimpleOwner', 'BridgeCondition']], index=['SimpleOwner'], columns='BridgeCondition', values='BridgeCondition', aggfunc='size')

BridgeCondition,F,G,P
SimpleOwner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
County,22.0,1.0,26.0
Municipal,7.0,,35.0
Other,1.0,,1.0
State,269.0,79.0,98.0
