In [90]:
import pandas as pd
import numpy as np
import io
import requests
from datetime import datetime


In [91]:
url = 'https://phl.carto.com/api/v2/sql?q=SELECT+*,+ST_Y(the_geom)+AS+lat,+ST_X(the_geom)+AS+lng+FROM+opa_properties_public&filename=opa_properties_public&format=csv&skipfields=cartodb_id,the_geom,the_geom_webmercator'
df = pd.read_csv(url)


Unnamed: 0,number_of_rooms,assessment_date,beginning_point,book_and_page,building_code,building_code_description,category_code,category_code_description,census_tract,central_air,...,unit,utility,view_type,year_built,year_built_estimate,zip_code,zoning,objectid,lat,lng
0,6.0,,"153'7 1/2""N ALLEGHENY",1.77592e+06,O30,ROW 2 STY MASONRY,1,Single Family,521.0,,...,,,I,1935,Y,191342205,RM1,604225976,39.998934,-75.127498
1,6.0,,"16'4 1/2""S WILLARD",1.74793e+06,O30,ROW 2 STY MASONRY,1,Single Family,521.0,,...,,,I,1935,Y,191342205,RM1,604225977,39.999272,-75.127344
2,6.0,,SWC WILLARD,3.50011e+06,O30,ROW 2 STY MASONRY,1,Single Family,521.0,,...,,,I,1935,Y,191342205,RM1,604225978,39.999317,-75.127335
3,6.0,,NWC WILLA RD,0,O30,ROW 2 STY MASONRY,1,Single Family,521.0,,...,,,I,1935,Y,191342227,RM1,604225979,39.999468,-75.127297
4,6.0,,SEC WILLARD,3.15587e+06,O30,ROW 2 STY MASONRY,1,Single Family,520.0,,...,,,I,1935,Y,191342204,RM1,604225980,39.999256,-75.126923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581451,6.0,,"50'8 1/4"" E A ST",3268072,H30,SEMI/DET 2 STY MASONRY,1,Single Family,70.0,N,...,,,I,1940,Y,191203523,RSA3,604804204,40.029893,-75.120256
581452,0.0,,"1007'10 1/2"" N OF",2426363,K10,S/D W/B GAR 1 STY MASONRY,1,Single Family,920.0,,...,,,I,1955,,191113012,RSA3,604804205,40.066336,-75.085918
581453,6.0,,"64'6"" N 75 AVE",0062358,R30,ROW B/GAR 2 STY MASONRY,1,Single Family,680.0,N,...,,,I,1925,Y,191382211,RSA5,604804206,40.069069,-75.151527
581454,6.0,,139.167' NE OF CEDAR,3209997,O50,ROW 3 STY MASONRY,1,Single Family,40.0,Y,...,,,I,2015,,19125,RSA5,604804207,39.977300,-75.125414


In [93]:
### new columns
added_columns = [
    '# UNITS',
    'REIS Submarket',
    'CITY',
    'STATE',
    'RESI',
    'CONDO',
    'UNIT',
    'COMM',
    'TOT ASSD $',
    'RE TAXES',
]

### OVERWRITE instructions for the original data file
### these instructions could be manually altered  
rename_dict = {
    'number_of_rooms': {
        'delete': 0,
        'new_name': '# ROOMS',
    },
    'assessment_date': {
        'delete': 0,
        'new_name': 'assessment_date',
    },
    'beginning_point': {
        'delete': 1,
    },
    'book_and_page': {
        'delete': 1,
    },
    'building_code': {
        'delete': 1,
    },
    'building_code_description': {
        'delete': 0,
        'new_name': 'BLDG CODE',
    },
    'category_code': {
        'delete': 1,
    },
    'category_code_description': {
        'delete': 0,
        'new_name': 'BLDG CAT',
    },
    'census_tract': {
        'delete': 1,
    },
    'central_air': {
        'delete': 0,
        'new_name': 'CENTRAL AIR',
    },
    'cross_reference': {
        'delete': 1,
    },
    'date_exterior_condition': {
        'delete': 0,
        'new_name': 'EXT CONDITION DATE',
    },
    'depth': {
        'delete': 0,
        'new_name': 'LOT DEPTH',
    },
    'exempt_building': {
        'delete': 0,
        'new_name': 'BLDG EXEMPT',
    },
    'exempt_land': {
        'delete': 0,
        'new_name': 'LAND EXEMPT',
    },
    'exterior_condition': {
        'delete': 0,
        'new_name': 'EXT CONDITION',
    },
    'fireplaces': {
        'delete': 0,
        'new_name': '# FIREPLACE',
    },
    'frontage': {
        'delete': 0,
        'new_name': 'LOT FRONTAGE',
    },
    'fuel': {
        'delete': 1,
    },
    'garage_spaces': {
        'delete': 0,
        'new_name': 'GARAGE',
    },
    'garage_type': {
        'delete': 0,
        'new_name': 'GARAGE TYPE',
    },
    'general_construction': {
        'delete': 1,
    },
    'geographic_ward': {
        'delete': 1,
    },
    'homestead_exemption': {
        'delete': 0,
        'new_name': 'homestead_exemption',
    },
    'house_extension': {
        'delete': 1,
    },
    'house_number': {
        'delete': 1,
    },
    'interior_condition': {
        'delete': 0,
        'new_name': 'INT CONDITION',
    },
    'location': {
        'delete': 0,
        'new_name': 'ADDRESS',
    },
    'mailing_address_1': {
        'delete': 1,
    },
    'mailing_address_2': {
        'delete': 1,
    },
    'mailing_care_of': {
        'delete': 1,
    },
    'mailing_city_state': {
        'delete': 0,
        'new_name': 'OWNER CITY',
    },
    'mailing_street': {
        'delete': 0,
        'new_name': 'OWNER ADDRESS',
    },
    'mailing_zip': {
        'delete': 0,
        'new_name': 'OWNER ZIP',
    },
    'market_value': {
        'delete': 0,
        'new_name': 'MARKET VALUE',
    },
    'market_value_date': {
        'delete': 1,
    },
    'number_of_bathrooms': {
        'delete': 0,
        'new_name': '# BATH',
    },
    'number_of_bedrooms': {
        'delete': 0,
        'new_name': '# BED',
    },
    'basements': {
        'delete': 0,
        'new_name': 'BASEMENT',
    },
    'number_stories': {
        'delete': 0,
        'new_name': '# FLOORS',
    },
    'off_street_open': {
        'delete': 0,
        'new_name': 'off_street_open'
    },
    'other_building': {
        'delete': 0,
        'new_name': 'BUILDING',
    },
    'owner_1': {
        'delete': 0,
        'new_name': 'OWNER',
    },
    'owner_2': {
        'delete': 1,
    },
    'parcel_number': {
        'delete': 0,
        'new_name': 'PARCEL ID',
    },
    'parcel_shape': {
        'delete': 0,
        'new_name': 'PARCEL SHAPE',
    },
    'quality_grade': {
        'delete': 1
    },
    'recording_date': {
        'delete': 0,
        'new_name': 'RECORDING DATE',
    },
    'registry_number': {
        'delete': 1
    },
    'sale_date': {
        'delete': 0,
        'new_name': 'SALE DATE',
    },
    'sale_price': {
        'delete': 0,
        'new_name': 'SALE PRICE',
    },
    'separate_utilities': {
        'delete': 1,
    },
    'sewer': {
        'delete': 1,
    },
    'site_type': {
        'delete': 1,
    },
    'state_code': {
        'delete': 1,
    },
    'street_code': {
        'delete': 1,
    },
    'street_designation': {
        'delete': 1,
    },
    'street_direction': {
        'delete': 1,
    },
    'street_name': {
        'delete': 1,
    },
    'suffix': {
        'delete': 1,
    },
    'taxable_building': {
        'delete': 0,
        'new_name': 'BLDG ASSD $',
    },
    'taxable_land': {
        'delete': 0,
        'new_name': 'LAND ASSD $',
    },
    'topography': {
        'delete': 0,
        'new_name': 'TOPOGRAPHY',
    },
    'total_area': {
        'delete': 0,
        'new_name': 'LAND SF',
    },
    'total_livable_area': {
        'delete': 0,
        'new_name': 'GSF',
    },
    'type_heater': {
        'delete': 1,
    },
    'unfinished': {
        'delete': 1,
    },
    'unit': {
        'delete': 0,
        'new_name': 'UNIT #',
    },
    'utility': {
        'delete': 1,
    },
    'view_type': {
        'delete': 0,
        'new_name': 'VIEW',
    },
    'year_built': {
        'delete': 0,
        'new_name': 'YEAR BUILT',
    },
    'year_built_estimate': {
        'delete': 1,
    },
    'zip_code': {
        'delete': 0,
        'new_name': 'ZIP',
    },
    'zoning': {
        'delete': 0,
        'new_name': 'ZONING',
    },
    'objectid': {
        'delete': 1,
    },
    'lat': {
        'delete': 0,
        'new_name': 'LATITUDE',
    },
    'lng': {
        'delete': 0,
        'new_name': 'LONGITUDE',
    },
}

instructions = {
    'added_columns': added_columns,
    'rename_dict': rename_dict,
}

In [198]:
def pre_clean_df(df, instructions):
    added_columns = instructions['added_columns']
    rename_dict = instructions['rename_dict']
    orig_columns = list(instructions['rename_dict'].keys())
    df_new = df.copy()[orig_columns]
    
    for column in orig_columns:
        if rename_dict[column]['delete'] == 1:
            df_new = df_new.drop([column], axis=1)
        if rename_dict[column]['delete'] == 0:
            df_new = df_new.rename(columns={column: rename_dict[column]['new_name']})
    
    df_new = df_new.reindex(df_new.columns.tolist()+added_columns, axis=1)\
                   .astype(dtype={'SALE DATE': str})
    
    df_new['SALE DATE'] = pd.to_datetime(df_new['SALE DATE'], errors='coerce')
    df_new = df_new.sort_values(by=['SALE DATE'], ascending=False)\
                   .drop(df_new[df_new['SALE DATE']==pd.NaT].index)\
                   .reset_index(drop=True)
    
    return df_new

def subset_df_date(df_new, deltadays):
    delta = pd.Timedelta(deltadays)
    df_new = df_new.sort_values(by=['SALE DATE'], ascending=False)
    latest_date = df_new['SALE DATE'].iloc[0]
    earliest_date = latest_date-delta
    keep_index = df_new[(df_new['SALE DATE']>=earliest_date) & 
                        (df_new['SALE DATE']<=latest_date)].index
    df_sub = df_new.iloc[keep_index]\
                   .reset_index(drop=True)
    return df_sub

def update_PLUTO(pluto, df_sub):
    pluto_addresses = pluto['ADDRESS'].tolist()
    sub_addresses = df_sub['ADDRESS'].tolist()
    pluto_update = pluto.copy()
    
    for address in sub_addresses:
        if address in pluto_addresses:
            print(f'1: {address}')
            pluto_update.at[
                pluto_update[pluto_update['ADDRESS']==address].index,
                ['GSF', 'SALE PRICE', 'SALE DATE']
            ] = df_sub.loc[df_sub['ADDRESS']==address][['GSF', 'SALE PRICE', 'SALE DATE']]
        else:
            print(f'2: {address}')
            added_row = df_sub.loc[df_sub['ADDRESS']==address]
            pluto_update = pluto_update.append(added_row, ignore_index=True)
    
    return pluto_update

In [199]:
df_new = pre_clean_df(df, instructions)
df_sub = subset_df_date(df_new, '40 days')

In [200]:
pluto = pd.read_csv('../data/project/PHLPL-001 All_Properties [byaddress;location] PLUTO.csv')

In [201]:
pluto['SALE DATE'] = pd.to_datetime(pluto['SALE DATE'], errors='coerce')

In [202]:
pluto = pluto.sort_values(by=['SALE DATE'], ascending=False)

In [None]:
pluto_update = update_PLUTO(pluto, df_sub)

1: 2323 W CUMBERLAND ST
1: 2643 SEPVIVA ST
2: 3300 CONRAD ST
1: 2849 HEDLEY ST
1: 3135 CROATAN PL
1: 1916 N 61ST ST
1: 2614 S MARSHALL ST
1: 1110 BUTTONWOOD ST
1: 1310 NARRAGANSETT ST
1: 3319 ENGLEWOOD ST
1: 2833 S ISEMINGER ST
1: 7245 OAKLAND ST
1: 2143 E CLEARFIELD ST
2: 5940 MARKET ST
2: 2012 WHARTON ST
1: 2557 N JESSUP ST
1: 3618 N MARVINE ST
1: 1227 E VENANGO ST
1: 611 W LURAY ST
1: 2923 S CARLISLE ST
1: 12521 RICHTON RD
1: 636 E TABOR RD
1: 219 W DUVAL ST
1: 602 W LURAY ST
1: 1824 MEDARY AVE
1: 6308 EASTWOOD ST
1: 414 E WYOMING AVE
1: 2203 DISSTON ST
1: 478 HART LN
1: 2643 N 31ST ST
2: 2324-26 E SUSQUEHANNA AVE
1: 1510 LOMBARD ST
1: 333 DALY ST
1: 3158 MILLER ST
1: 5659 HADFIELD ST
1: 2113 E WILLIAM ST
1: 2842 UNRUH AVE
1: 1818 HARTEL AVE
2: 2602 W DAUPHIN ST
1: 3335 DISSTON ST
1: 12046 GLENFIELD ST
1: 1217 S 49TH ST
1: 530 MONTROSE ST
1: 5605 SPRUCE ST
1: 3003 S 17TH ST
1: 6439 EASTWOOD ST
1: 3214 H ST
1: 816 GRIFFITH ST
1: 3051 HARTVILLE ST
1: 2327 WATKINS ST
2: 4403 CHESTNUT S

In [193]:
pluto

Unnamed: 0,ADDRESS,REIS Submarket,CITY,STATE,ZIP,PARCEL ID,UNIT #,LATITUDE,LONGITUDE,YEAR BUILT,...,BLDG ASSD $,LAND ASSD $,TOT ASSD $,RE TAXES,TOPOGRAPHY,VIEW,ZONING,RECORDING DATE,homestead_exemption,off_street_open
88474,1903 N PHILIP ST,North Frankford,Philadelphia,PA,19122,183286100,,39.979368,-75.137918,1920.0,...,36120.0,32280.0,68400,948,F,I,RM1,6/10/2019,39200.0,0.0
207338,1839-47 BLAIR ST,North Frankford,Philadelphia,PA,19125,888181568,7,39.978068,-75.131378,2017.0,...,80767.0,42700.0,123467,1711,F,I,CMX2,6/7/2019,0.0,0.0
404551,1252 PALETHORP ST,North Frankford,Philadelphia,PA,19122,888181894,1,39.970801,-75.138852,2018.0,...,355300.0,62700.0,418000,5793,F,I,RM1,6/10/2019,,0.0
128769,620 N 03RD ST,Center City,Philadelphia,PA,19123,888501304,000002C,39.962035,-75.143448,2017.0,...,488400.0,79500.0,567900,7870,F,I,,6/3/2019,0.0,0.0
237861,4916 KNORR ST,North Frankford,Philadelphia,PA,19135,412016300,,40.022896,-75.042486,1920.0,...,80862.0,19338.0,100200,1389,F,I,RSA5,6/4/2019,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
406990,5450 WHITBY AVE,West Philadelphia,Philadelphia,PA,19143,513156600,,39.944372,-75.230499,1920.0,...,59496.0,10404.0,69900,969,F,I,RM1,4/18/2019,0.0,0.0
413511,901 N PENN ST,Center City,Philadelphia,PA,19123,888061776,000P702,39.961707,-75.134510,2006.0,...,403920.0,55080.0,459000,6361,,B,CMX3,6/7/2018,0.0,0.0
428786,4306 TYSON AVE,North Frankford,Philadelphia,PA,19135,552192900,,40.030833,-75.044735,1950.0,...,89980.0,37520.0,127500,1767,F,I,RSA3,6/16/2016,40000.0,0.0
446760,1939 CHESTNUT ST,Center City,Philadelphia,PA,19103,888038624,000004A,39.952323,-75.173286,1900.0,...,226000.0,33700.0,259700,3599,,I,CMX4,8/8/1991,0.0,0.0


In [194]:
df_sub

Unnamed: 0,# ROOMS,assessment_date,BLDG CODE,BLDG CAT,CENTRAL AIR,EXT CONDITION DATE,LOT DEPTH,BLDG EXEMPT,LAND EXEMPT,EXT CONDITION,...,# UNITS,REIS Submarket,CITY,STATE,RESI,CONDO,UNIT,COMM,TOT ASSD $,RE TAXES
0,6.0,,ROW 2 STY MASONRY,Single Family,,,87.00,43010.0,1990.0,4.0,...,,,,,,,,,,
1,0.0,,ROW 3 STY MASONRY,Single Family,,,55.00,83942.0,0.0,3.0,...,,,,,,,,,,
2,0.0,,VACANT LAND RESIDE < ACRE,Vacant Land,,,55.72,177375.0,0.0,0.0,...,,,,,,,,,,
3,5.0,,ROW 2 STY MASONRY,Single Family,N,2012-07-10,78.88,45000.0,0.0,4.0,...,,,,,,,,,,
4,6.0,,ROW B/GAR 2 STY MASONRY,Single Family,Y,2017-03-24,97.41,45000.0,0.0,4.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1188,5.0,,ROW 3 STY MASONRY,Single Family,Y,,50.00,45000.0,0.0,3.0,...,,,,,,,,,,
1189,6.0,,ROW 2 STY MASONRY,Single Family,N,2012-01-20,51.00,0.0,0.0,4.0,...,,,,,,,,,,
1190,6.0,,ROW 2 STY MASONRY,Single Family,,2012-05-04,54.58,0.0,0.0,4.0,...,,,,,,,,,,
1191,6.0,,ROW 2 STY MASONRY,Single Family,,2012-03-01,100.00,0.0,0.0,4.0,...,,,,,,,,,,
