In [1]:
import pandas as pd
import geopandas as gpd

### Import all bridge projects in Allegheny County

In [2]:
future = gpd.read_file('input/Future_Development___All.zip')
future = future.set_crs('EPSG:3857')

indev = gpd.read_file('input/In_Development___All.zip')
indev = indev.set_crs('EPSG:3857')

construct = gpd.read_file('input/constr.zip')
construct = construct.set_crs('EPSG:3857')

In [3]:
df = pd.concat([future, indev, construct])
df = df.to_crs('EPSG:4326')

In [4]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1041 entries, 0 to 467
Columns: 125 entries, GISDATA_PR to geometry
dtypes: float64(11), geometry(1), int64(13), object(100)
memory usage: 1.0+ MB


### Restrict to Pittsburgh-owned bridges, only include projects once

In [5]:
df = df[df['PROJECT_SP'] == 'City of Pittsburgh']

In [6]:
df['PROJECT__6'] = df['PROJECT__6'].str.title()
df['PUBLIC_NAR'] = df['PUBLIC_NAR'].str.replace('(Sponor = City of Pittsburgh) ', '', regex=False)
df['PUBLIC_NAR'] = df['PUBLIC_NAR'].str.replace('(Sponsor = City of Pittsburgh)', '', regex=False)
df['PUBLIC_NAR'] = df['PUBLIC_NAR'].str.replace('(Project sponsor = City of Pittsburgh)', '', regex=False)

In [7]:
df = df.drop_duplicates('PROJECT_ID')
df = df.reset_index(drop=True)

In [8]:
df.loc[df['COMPLETI_1'].notna(), 'COMPLETI_1 DT'] = pd.to_datetime(df['COMPLETI_1'], format='%Y%m%d')

df.loc[df['COMPLETI_1'].notna(), 'COMPLETI_1'] = df['COMPLETI_1 DT'].dt.strftime('%m/%d/%Y')
df.loc[df['COMPLETI_1'].isna(), 'COMPLETI_1'] = 'Unknown'

In [9]:
df['NTP_DATE'] = pd.to_datetime(df['NTP_DATE'], format='%Y%m%d')
df['NTP_DATE'] = df['NTP_DATE'].dt.strftime('%m/%d/%Y')

In [10]:
df['lng'] = df.apply(lambda x: x['geometry'].coords[0][0], axis=1)
df['lat'] = df.apply(lambda x: x['geometry'].coords[0][1], axis=1)

In [11]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 24 entries, 0 to 23
Columns: 128 entries, GISDATA_PR to lat
dtypes: datetime64[ns](1), float64(13), geometry(1), int64(13), object(100)
memory usage: 24.1+ KB


### Properly format

In [12]:
df[[
    'lng',
    'lat',
    'PUBLIC_NAR',
    'PROJECT__6',
    'EST_CONSTR',
    'NTP_DATE',
    'COMPLETI_1'
]]

Unnamed: 0,lng,lat,PUBLIC_NAR,PROJECT__6,EST_CONSTR,NTP_DATE,COMPLETI_1
0,-79.9976,40.3945,Superstructure replacement on Ansonia Place ov...,Future Development,550000.0,07/14/2031,Unknown
1,-79.9475,40.457,Bridge preservation on South Millvale Avenue b...,Future Development,15380000.0,08/14/2034,Unknown
2,-79.9812,40.4657,Bridge rehabilitation on Lowrie Street over Ri...,Future Development,750000.0,06/21/2032,Unknown
3,-79.9768,40.4236,Bridge preservation on Mission Street between ...,Future Development,8984000.0,06/14/2032,Unknown
4,-79.951818,40.454023,"Bridge preservation on SR 400, Baum Boulevard ...",Future Development,1500000.0,06/14/2032,10/15/2034
5,-80.0433,40.4517,Tunnel reconstruction and preservation work o...,Future Development,17510000.0,09/11/2034,12/01/2035
6,-79.9335,40.4563,Bridge rehabilitation on structure carrying S...,In Development,5005000.0,09/13/2023,01/21/2025
7,-79.9505,40.4267,"Bridge rehabilitation on Swinburne Bridge, wh...",In Development,12300000.0,03/09/2026,11/04/2025
8,-79.9076,40.4677,Bridge restoration/replacement on Larimer Ave...,In Development,21000000.0,08/26/2025,06/30/2027
9,-80.0049,40.4726,Swindell Bridge Rehabilitation located between...,In Development,30000000.0,12/07/2026,01/07/2028


In [13]:
df[[
    'lng',
    'lat',
    'PUBLIC_NAR',
    'PROJECT__6',
    'EST_CONSTR',
    'NTP_DATE',
    'COMPLETI_1'
]].to_csv('output/pgh-bridges.csv', index=False)