In [90]:
import pandas as pd
import numpy as np
import io
import requests
from datetime import datetime


In [91]:
url = 'https://phl.carto.com/api/v2/sql?q=SELECT+*,+ST_Y(the_geom)+AS+lat,+ST_X(the_geom)+AS+lng+FROM+opa_properties_public&filename=opa_properties_public&format=csv&skipfields=cartodb_id,the_geom,the_geom_webmercator'
df = pd.read_csv(url)


Unnamed: 0,number_of_rooms,assessment_date,beginning_point,book_and_page,building_code,building_code_description,category_code,category_code_description,census_tract,central_air,...,unit,utility,view_type,year_built,year_built_estimate,zip_code,zoning,objectid,lat,lng
0,6.0,,"153'7 1/2""N ALLEGHENY",1.77592e+06,O30,ROW 2 STY MASONRY,1,Single Family,521.0,,...,,,I,1935,Y,191342205,RM1,604225976,39.998934,-75.127498
1,6.0,,"16'4 1/2""S WILLARD",1.74793e+06,O30,ROW 2 STY MASONRY,1,Single Family,521.0,,...,,,I,1935,Y,191342205,RM1,604225977,39.999272,-75.127344
2,6.0,,SWC WILLARD,3.50011e+06,O30,ROW 2 STY MASONRY,1,Single Family,521.0,,...,,,I,1935,Y,191342205,RM1,604225978,39.999317,-75.127335
3,6.0,,NWC WILLA RD,0,O30,ROW 2 STY MASONRY,1,Single Family,521.0,,...,,,I,1935,Y,191342227,RM1,604225979,39.999468,-75.127297
4,6.0,,SEC WILLARD,3.15587e+06,O30,ROW 2 STY MASONRY,1,Single Family,520.0,,...,,,I,1935,Y,191342204,RM1,604225980,39.999256,-75.126923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581451,6.0,,"50'8 1/4"" E A ST",3268072,H30,SEMI/DET 2 STY MASONRY,1,Single Family,70.0,N,...,,,I,1940,Y,191203523,RSA3,604804204,40.029893,-75.120256
581452,0.0,,"1007'10 1/2"" N OF",2426363,K10,S/D W/B GAR 1 STY MASONRY,1,Single Family,920.0,,...,,,I,1955,,191113012,RSA3,604804205,40.066336,-75.085918
581453,6.0,,"64'6"" N 75 AVE",0062358,R30,ROW B/GAR 2 STY MASONRY,1,Single Family,680.0,N,...,,,I,1925,Y,191382211,RSA5,604804206,40.069069,-75.151527
581454,6.0,,139.167' NE OF CEDAR,3209997,O50,ROW 3 STY MASONRY,1,Single Family,40.0,Y,...,,,I,2015,,19125,RSA5,604804207,39.977300,-75.125414


In [93]:
### new columns
added_columns = [
    '# UNITS',
    'REIS Submarket',
    'CITY',
    'STATE',
    'RESI',
    'CONDO',
    'UNIT',
    'COMM',
    'TOT ASSD $',
    'RE TAXES',
]

### OVERWRITE instructions for the original data file
### these instructions could be manually altered  
rename_dict = {
    'number_of_rooms': {
        'delete': 0,
        'new_name': '# ROOMS',
    },
    'assessment_date': {
        'delete': 0,
        'new_name': 'assessment_date',
    },
    'beginning_point': {
        'delete': 1,
    },
    'book_and_page': {
        'delete': 1,
    },
    'building_code': {
        'delete': 1,
    },
    'building_code_description': {
        'delete': 0,
        'new_name': 'BLDG CODE',
    },
    'category_code': {
        'delete': 1,
    },
    'category_code_description': {
        'delete': 0,
        'new_name': 'BLDG CAT',
    },
    'census_tract': {
        'delete': 1,
    },
    'central_air': {
        'delete': 0,
        'new_name': 'CENTRAL AIR',
    },
    'cross_reference': {
        'delete': 1,
    },
    'date_exterior_condition': {
        'delete': 0,
        'new_name': 'EXT CONDITION DATE',
    },
    'depth': {
        'delete': 0,
        'new_name': 'LOT DEPTH',
    },
    'exempt_building': {
        'delete': 0,
        'new_name': 'BLDG EXEMPT',
    },
    'exempt_land': {
        'delete': 0,
        'new_name': 'LAND EXEMPT',
    },
    'exterior_condition': {
        'delete': 0,
        'new_name': 'EXT CONDITION',
    },
    'fireplaces': {
        'delete': 0,
        'new_name': '# FIREPLACE',
    },
    'frontage': {
        'delete': 0,
        'new_name': 'LOT FRONTAGE',
    },
    'fuel': {
        'delete': 1,
    },
    'garage_spaces': {
        'delete': 0,
        'new_name': 'GARAGE',
    },
    'garage_type': {
        'delete': 0,
        'new_name': 'GARAGE TYPE',
    },
    'general_construction': {
        'delete': 1,
    },
    'geographic_ward': {
        'delete': 1,
    },
    'homestead_exemption': {
        'delete': 0,
        'new_name': 'homestead_exemption',
    },
    'house_extension': {
        'delete': 1,
    },
    'house_number': {
        'delete': 1,
    },
    'interior_condition': {
        'delete': 0,
        'new_name': 'INT CONDITION',
    },
    'location': {
        'delete': 0,
        'new_name': 'ADDRESS',
    },
    'mailing_address_1': {
        'delete': 1,
    },
    'mailing_address_2': {
        'delete': 1,
    },
    'mailing_care_of': {
        'delete': 1,
    },
    'mailing_city_state': {
        'delete': 0,
        'new_name': 'OWNER CITY',
    },
    'mailing_street': {
        'delete': 0,
        'new_name': 'OWNER ADDRESS',
    },
    'mailing_zip': {
        'delete': 0,
        'new_name': 'OWNER ZIP',
    },
    'market_value': {
        'delete': 0,
        'new_name': 'MARKET VALUE',
    },
    'market_value_date': {
        'delete': 1,
    },
    'number_of_bathrooms': {
        'delete': 0,
        'new_name': '# BATH',
    },
    'number_of_bedrooms': {
        'delete': 0,
        'new_name': '# BED',
    },
    'basements': {
        'delete': 0,
        'new_name': 'BASEMENT',
    },
    'number_stories': {
        'delete': 0,
        'new_name': '# FLOORS',
    },
    'off_street_open': {
        'delete': 0,
        'new_name': 'off_street_open'
    },
    'other_building': {
        'delete': 0,
        'new_name': 'BUILDING',
    },
    'owner_1': {
        'delete': 0,
        'new_name': 'OWNER',
    },
    'owner_2': {
        'delete': 1,
    },
    'parcel_number': {
        'delete': 0,
        'new_name': 'PARCEL ID',
    },
    'parcel_shape': {
        'delete': 0,
        'new_name': 'PARCEL SHAPE',
    },
    'quality_grade': {
        'delete': 1
    },
    'recording_date': {
        'delete': 0,
        'new_name': 'RECORDING DATE',
    },
    'registry_number': {
        'delete': 1
    },
    'sale_date': {
        'delete': 0,
        'new_name': 'SALE DATE',
    },
    'sale_price': {
        'delete': 0,
        'new_name': 'SALE PRICE',
    },
    'separate_utilities': {
        'delete': 1,
    },
    'sewer': {
        'delete': 1,
    },
    'site_type': {
        'delete': 1,
    },
    'state_code': {
        'delete': 1,
    },
    'street_code': {
        'delete': 1,
    },
    'street_designation': {
        'delete': 1,
    },
    'street_direction': {
        'delete': 1,
    },
    'street_name': {
        'delete': 1,
    },
    'suffix': {
        'delete': 1,
    },
    'taxable_building': {
        'delete': 0,
        'new_name': 'BLDG ASSD $',
    },
    'taxable_land': {
        'delete': 0,
        'new_name': 'LAND ASSD $',
    },
    'topography': {
        'delete': 0,
        'new_name': 'TOPOGRAPHY',
    },
    'total_area': {
        'delete': 0,
        'new_name': 'LAND SF',
    },
    'total_livable_area': {
        'delete': 0,
        'new_name': 'GSF',
    },
    'type_heater': {
        'delete': 1,
    },
    'unfinished': {
        'delete': 1,
    },
    'unit': {
        'delete': 0,
        'new_name': 'UNIT #',
    },
    'utility': {
        'delete': 1,
    },
    'view_type': {
        'delete': 0,
        'new_name': 'VIEW',
    },
    'year_built': {
        'delete': 0,
        'new_name': 'YEAR BUILT',
    },
    'year_built_estimate': {
        'delete': 1,
    },
    'zip_code': {
        'delete': 0,
        'new_name': 'ZIP',
    },
    'zoning': {
        'delete': 0,
        'new_name': 'ZONING',
    },
    'objectid': {
        'delete': 1,
    },
    'lat': {
        'delete': 0,
        'new_name': 'LATITUDE',
    },
    'lng': {
        'delete': 0,
        'new_name': 'LONGITUDE',
    },
}

instructions = {
    'added_columns': added_columns,
    'rename_dict': rename_dict,
}

In [165]:
def pre_clean_df(df, instructions):
    added_columns = instructions['added_columns']
    rename_dict = instructions['rename_dict']
    orig_columns = list(instructions['rename_dict'].keys())
    df_new = df.copy()[orig_columns]
    
    for column in orig_columns:
        if rename_dict[column]['delete'] == 1:
            df_new = df_new.drop([column], axis=1)
        if rename_dict[column]['delete'] == 0:
            df_new = df_new.rename(columns={column: rename_dict[column]['new_name']})
    
    df_new = df_new.reindex(df_new.columns.tolist()+added_columns, axis=1)\
                   .astype(dtype={'SALE DATE': str})
    
    df_new['SALE DATE'] = pd.to_datetime(df_new['SALE DATE'], errors='coerce')
    df_new = df_new.sort_values(by=['SALE DATE'], ascending=False)\
                   .drop(df_new[df_new['SALE DATE']==pd.NaT].index)\
                   .reset_index(drop=True)
    
    return df_new

def subset_df_date(df_new, deltadays):
    delta = pd.Timedelta(deltadays)
    df_new = df_new.sort_values(by=['SALE DATE'], ascending=False)
    latest_date = df_new['SALE DATE'].iloc[0]
    earliest_date = latest_date-delta
    keep_index = df_new[(df_new['SALE DATE']>=earliest_date) & 
                        (df_new['SALE DATE']<=latest_date)].index
    df_sub = df_new.iloc[keep_index]\
                   .reset_index(drop=True)
    return df_sub

In [166]:
df_new = pre_clean_df(df, instructions)
df_sub = subset_df_date(df_new, '40 days')

In [168]:
df_sub[['SALE DATE']]

Unnamed: 0,SALE DATE
0,2019-12-22
1,2019-12-21
2,2019-12-09
3,2019-12-06
4,2019-12-05
...,...
1188,2019-11-12
1189,2019-11-12
1190,2019-11-12
1191,2019-11-12


In [68]:
pluto = pd.read_csv('../data/project/PHLPL-001 All_Properties [byaddress;location] PLUTO.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [138]:
pluto['SALE DATE'] = pd.to_datetime(pluto['SALE DATE'], errors='coerce')

In [139]:
pluto = pluto.sort_values(by=['SALE DATE'], ascending=False)

In [141]:
pluto[['SALE DATE']]

Unnamed: 0,SALE DATE
88474,2019-06-10
207338,2019-05-29
404551,2019-05-29
128769,2019-05-24
237861,2019-05-24
...,...
406990,NaT
413511,NaT
428786,NaT
446760,NaT


In [145]:
pd.Timedelta('30 days')

Timedelta('30 days 00:00:00')

In [148]:
datetime(2019,1,10)-pd.Timedelta('30 days')

datetime.datetime(2018, 12, 11, 0, 0)