# Analyze SPC construction projects

In [1]:
import pandas as pd

### Import latest NBI data

In [2]:
nbi = pd.read_csv('output/nbi2022-formatted.csv')

In [3]:
nbi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4788 entries, 0 to 4787
Data columns (total 3 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   STRUCTURE_NUMBER_008  4788 non-null   object
 1   BridgeCondition       4788 non-null   object
 2   SimpleOwner           4788 non-null   object
dtypes: object(3)
memory usage: 112.3+ KB


### Import construction projects

In [4]:
projects = pd.read_csv('input/projects.csv')

In [5]:
projects.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Planning Partner  155 non-null    object 
 1   Dist              155 non-null    int64  
 2   County            155 non-null    object 
 3   Project           155 non-null    int64  
 4   Title             155 non-null    object 
 5   SR                144 non-null    float64
 6   Sec               148 non-null    object 
 7   Project Class     155 non-null    object 
 8   UDF Narrative     0 non-null      float64
 9   Phase             155 non-null    object 
 10  Fund              155 non-null    object 
 11  2023 Tot          155 non-null    object 
 12  2024 Tot          155 non-null    object 
 13  2025 Tot          155 non-null    object 
 14  2026 Tot          155 non-null    object 
 15  2027 Tot          155 non-null    object 
 16  2028 Tot          155 non-null    object 
 1

In [6]:
projects['Total'] = projects.Total.str.replace(',', '')
projects['Total'] = projects.Total.astype(int)

In [7]:
projects = projects.groupby(['Project', 'Title']).Total.aggregate('sum').to_frame().reset_index()

### Import NBI bridge keys

In [8]:
keys = pd.read_csv('input/projects-keys.csv')
keys = keys[keys['Bridge Key'].notna()]

In [9]:
multikeyprojects = []

for (index, row) in keys[keys['Bridge Key'].str.contains(',')].iterrows():
    for key in row['Bridge Key'].split(', '):
        multikeyprojects.append([
            row['Project'],
            row['Project Title'],
            key,
            '',
            ''
        ])

In [10]:
keys = pd.concat([
    keys[~keys['Bridge Key'].str.contains(',')],
    pd.DataFrame(multikeyprojects, columns=keys.columns)
])

In [11]:
keys['Bridge Key'] = keys['Bridge Key'].astype(str).str.zfill(15)

### Merge projects, bridge keys and NBI data

In [12]:
projects = pd.merge(projects, keys[['Project', 'Bridge Key']], on='Project')

In [13]:
projects = pd.merge(projects, nbi, left_on='Bridge Key', right_on='STRUCTURE_NUMBER_008')

In [14]:
projects

Unnamed: 0,Project,Title,Total,Bridge Key,STRUCTURE_NUMBER_008,BridgeCondition,SimpleOwner
0,27138,S. Millvale Avenue Bridge,10400000,000000000002452,000000000002452,F,Municipal
1,27144,28th Street Bridge,10015600,000000000002461,000000000002461,P,Municipal
2,27303,Ansonia Place Bridge,440000,000000000002395,000000000002395,P,Municipal
3,27306,Lowrie Street Bridge,680000,000000000002421,000000000002421,F,Municipal
4,27316,AL Local BPRS Group 2,2864000,000000000002631,000000000002631,P,County
...,...,...,...,...,...,...,...
84,117368,Corley St Bridge,601600,000000000002414,000000000002414,P,Municipal
85,117369,Calera St Bridge Replacement,792000,000000000002399,000000000002399,P,Municipal
86,117473,Steen Road Bridge,3840000,000000000002113,000000000002113,P,Municipal
87,117800,Taylor Ave Over Dirty Camp Run,680000,000000000002660,000000000002660,P,Municipal


### Look at projects by owner, condition