In [246]:
from sqlalchemy import create_engine
import pandas as pd
import geopandas as gpd
import requests
from bs4 import BeautifulSoup

In [2]:
DB = create_engine('postgresql://jon@localhost:5432/propertydb')

In [221]:
landlordnames = pd.DataFrame([
    ['10 SWEET WATER LN', 'Harold Waldman'],
    ['1128 MAIN ST', 'Stough Group'],
    ['121 EDGEWOOD AVE', 'Steiner Realty'],
    ['121 WOODLAND RD', 'Lisa Cibik'],
    ['1260 BEECHWOOD BLVD', 'Cominos'],
    ['13 PRIDE ST', 'Shepherds Heart'],
    ['1344 5TH AVE', 'Robert Lewis'],
    ['1800 SEYMOUR ST', 'DuqLight'],
    ['203 EDELWEISS DR', 'Michael King'],
    ['232 BLVD OF THE ALLIES', 'Parking Aty'],
    ['264 VEE LYNN DR', 'L/R Eckenrode'],
    ['312 GIST ST', 'Fleenor'],
    ['391 GLADE RUN LN', 'Lando'],
    ['401 BINGHAM ST', 'Berger'],
    ['410 SALEM DR', 'Merante'],
    ['414 GRANT ST RM 200', 'City'],
    ['542 FORBES AVE RM 347', 'Alleco'],
    ['5719 SOLWAY ST', 'Kaynar'],
    ['5919 MAIN BLVD', 'Keith Barbara'],
    ['600 FORBES AVE', 'Duq Uni'],
    ['600 GRANT ST', 'UPMC'],
    ['6135 BROWNSVILLE RD', 'Benkovski'],
    ['7101 PINEHURST CIR', 'SCW Trust'],
    ['740 TRUMBULL DR', 'Lamar Ads'],
    ['905 WATSON ST', 'Bethlehem Haven'],
    ['PO BOX 13444', 'L/R Eckenrode'],
    ['PO BOX 19741', 'Svn Trca']
], columns=['address', 'name'])

In [171]:
### Import Bluff/Uptown parcels

In [195]:
parcels = gpd.read_postgis("""SELECT assessments.parcelid, assessments.changenoticeaddress1 AS address, assessments.lotarea::int, ST_MakePoint(lon::float, lat::float) AS geom
FROM assessments
JOIN parcelcentroids
ON parcelcentroids.parcelid = assessments.parcelid
WHERE neighborhood = 'Bluff';""", DB, crs='EPSG:4269')
parcels = parcels.to_crs('EPSG:4326')

In [170]:
### Import zoning

In [189]:
zoning = gpd.read_file('input/zoning.geojson')
zoning = zoning[zoning.zon_new.isin(['UPR-A', 'UPR-B'])]

In [196]:
parcels = gpd.sjoin(parcels, zoning, predicate='within', how='inner')

In [292]:
parcels['address'] = parcels.address.str.strip()
parcels['address'] = parcels.address.replace(r'\s+', ' ', regex=True)
parcels['address'] = parcels.address.str.replace('1128 MAIN ST.*', '1128 MAIN ST', regex=True)
parcels['address'] = parcels.address.str.replace('PO BOX 19741 ST', 'PO BOX 19741')
parcels['address'] = parcels.address.str.replace('P O BOX', 'PO BOX')
parcels['address'] = parcels.address.str.replace('736 W INGOMAR RD.*', '736 W INGOMAR RD', regex=True)
parcels['address'] = parcels.address.str.replace('600 GRANT ST.*', '600 GRANT ST', regex=True)
parcels['address'] = parcels.address.str.replace('600 FORBES AVE.*', '600 FORBES AVE', regex=True)
parcels['address'] = parcels.address.str.replace('1344 FIFTH AVE', '1344 5TH AVE')
parcels['address'] = parcels.address.str.replace('423 DELEWARE AVE', '423 DELAWARE AVE')
parcels['address'] = parcels.address.str.replace('PO BOX 10277', '5810 HAMPTON ST')
parcels['address'] = parcels.address.str.replace('2405 SOUTH PARK RD STE 6', 'PO BOX 831')
parcels['address'] = parcels.address.str.replace('1022 5TH AVE', '1020 5TH AVE')
parcels['address'] = parcels.address.str.replace('809 FLEMING ST FRNT', '809 FLEMING ST FL')
parcels['address'] = parcels.address.str.replace('1204 5TH AVE', '600 FORBES AVE')
parcels['address'] = parcels.address.str.replace('503 ADMINISTRATION BLDG', '600 FORBES AVE')
parcels['address'] = parcels.address.str.replace('5149 STRAUBS LN', '2149 STRAUBS LN')
parcels['address'] = parcels.address.str.replace('1811 BLVD OF ALLIES STE 200', '1811 BOULEVARD OF THE ALLIES')
parcels['address'] = parcels.address.str.replace('1811 BOULEVARD OF THE ALLLIES', '1811 BOULEVARD OF THE ALLIES')

In [293]:
landlords = parcels.groupby('address').agg({ 'parcelid': 'count', 'lotarea': 'sum' }).reset_index()

In [294]:
landlordsWITHNAMES = pd.merge(landlords, landlordnames, on='address', how='outer')
landlordsWITHNAMES2 = landlordsWITHNAMES.groupby('name').agg({ 'parcelid': 'sum', 'lotarea': 'sum' }).reset_index()
# landlordsWITHNAMES.sort_values(by='parcelid', ascending=False).head(20)

landlordsWITHNAMES2['pct'] = landlordsWITHNAMES2.lotarea / landlords.lotarea.sum()
landlordsWITHNAMES2.sort_values(by='pct', ascending=False)

Unnamed: 0,name,parcelid,lotarea,pct
6,Duq Uni,13.0,83666.0,0.04807
12,L/R Eckenrode,56.0,78226.0,0.044945
7,DuqLight,1.0,70959.0,0.040769
2,Berger,12.0,65628.0,0.037706
20,SCW Trust,19.0,55709.0,0.032007
22,Steiner Realty,3.0,54682.0,0.031417
23,Stough Group,15.0,45984.0,0.02642
14,Lando,5.0,43480.0,0.024981
19,Robert Lewis,13.0,38932.0,0.022368
25,UPMC,13.0,36460.0,0.020948


In [295]:
landlordsWITHNAMES[landlordsWITHNAMES.name.isna()].parcelid.sum()

372.0

In [288]:
landlordsWITHNAMES[landlordsWITHNAMES.name.str.startswith('ZZ_')]

Unnamed: 0,address,parcelid,lotarea,name
0,,4.0,2974.0,ZZ_
1,1 1/2 BOUNDARY ST,1.0,2420.0,ZZ_
2,1 SENECA ST,1.0,1567.0,ZZ_
3,10 DEVON LN,3.0,4749.0,ZZ_
4,10 STONYBROOK RD,2.0,6530.0,ZZ_
...,...,...,...,...
260,PO BOX 53009,1.0,5280.0,ZZ_
261,PO BOX 53028,2.0,2423.0,ZZ_
262,PO BOX 781,1.0,1600.0,ZZ_
263,PO BOX 81005,1.0,2760.0,ZZ_


In [289]:
for lindex, landlord in landlordsWITHNAMES[landlordsWITHNAMES.name == 'ZZ_'].iterrows():
    landlordsWITHNAMES.loc[lindex, 'name'] = 'ZZ_'

    for pindex, parcel in parcels[parcels.address == landlord['address']].iterrows():
        resp = requests.get('https://www2.alleghenycounty.us/RealEstate/Sales.aspx?ParcelID=%s' % parcel['parcelid'])
        owner = BeautifulSoup(resp.text, 'html.parser').find('span', id='lblPrevOwner1').text

        landlordsWITHNAMES.loc[lindex, 'name'] = landlord['name'] + owner

In [291]:
landlordsWITHNAMES.to_csv('landlordsWITHNAMES.csv')

In [296]:
# citywide zoning -- import

In [351]:
allzoning = gpd.read_file('input/zoning.geojson')
allzoning = allzoning[allzoning.legendtype != 'Mount Oliver Borough']

In [352]:
allzoning['legendtype'] = allzoning.legendtype.str.strip()
allzoning['legendtype'] = allzoning.legendtype.replace('Single-Unit Attached Residential', 'Single-Unit A/D Residential')
allzoning['legendtype'] = allzoning.legendtype.replace('Single-Unit Detached Residential', 'Single-Unit A/D Residential')

In [353]:
# citywide zoning -- remove rivers from districts

In [354]:
rivers = gpd.read_file('input/rivers.geojson')

In [355]:
allzoning = allzoning.overlay(rivers, how='symmetric_difference')

  return geopandas.overlay(


In [359]:
# add up districts

In [358]:
azagg = allzoning.to_crs({'proj':'cea'})
azagg['area'] = azagg.area * 10.7639 # sq meters to sq feet
azagg['area'] = azagg.area / 43560 # sq feet to acres

azagg = azagg[~azagg.legendtype.isin(['Parks', 'Hillside'])].groupby('legendtype').agg({ 'area': 'sum' }).reset_index()
azagg['pct'] = azagg.area / azagg.area.sum()

azagg.sort_values(by='pct', ascending=False)

Unnamed: 0,legendtype,area,pct
13,Single-Unit A/D Residential,1035.483662,0.446936
16,Two-Unit Residential,263.540607,0.113749
12,Riverfront,183.147037,0.07905
7,Multi-Unit Residential,176.764333,0.076295
20,Urban Industrial,135.809033,0.058618
11,Planned Unit Development,111.808882,0.048259
5,Local Neighborhood Commercial,90.898004,0.039233
0,Educational/Medical Institution,81.846908,0.035327
14,Specially Planned,50.288149,0.021705
1,General Industrial,37.090667,0.016009
