# Combine the sales-valuation data with macroeconomic data, demographic data

In [1]:
# all imports
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
%matplotlib inline

# notebook settings
from IPython.display import display
pd.options.display.max_columns = None

In [2]:
# make sure your dir structure looks like this:
# dsga1001/ (git repo) --> dsga1001/ --> ipython/ --> .ipynb files
# dsga1001-data/ --> .pkl files

# path for exporting and importing data files
path = '../../../dsga1001-data/'

## Read in the sales-valuation data and prepare for merge
* read in the sales-valuation pickle file
* derive SALE YEAR and SALE MONTH columns from SALE DATE column so that we can join on them

In [3]:
# read in the inner-joined sales-valuation dataframe
df = pd.read_pickle(path + 'merged-sales-val.pkl')
display(df)

Unnamed: 0,s_borough,s_neighborhood,s_building_class_category,s_block,s_lot,s_address,s_apt_number,s_zipcode,s_residential_units,s_commercial_units,s_total_units,s_land_sqft,s_gross_sqft,s_year_built,s_tax_class_at_sale,s_building_class_at_sale,s_sale_price,s_sale_date,s_bbl,s_year,v_bldgcl,v_ltfront,v_ltdepth,v_stories,v_fullval,v_bldfront,v_latitude,v_longitude,v_nta
0,2,BATHGATE,01,03039,0064,467 EAST 185 STREET,,10458.0,1.0,0.0,1.0,1667.0,1296.0,1910.0,1,A1,329000,2011-01-19,2030390064,2010/11,A1,16,100,2.0,365000,44,40.856635,-73.892647,Claremont-Bathgate
1,2,BATHGATE,01,03046,0034,2085 BATHGATE AV,,10457.0,1.0,0.0,1.0,2060.0,1629.0,1899.0,1,A1,288500,2011-06-29,2030460034,2010/11,A1,22,93,2.7,322000,37,40.850613,-73.895064,Claremont-Bathgate
2,2,BATHGATE,01,03048,0028,540 EAST 182ND STREET,,10457.0,1.0,0.0,1.0,1209.0,1048.0,1901.0,1,A1,122500,2011-03-21,2030480028,2010/11,A1,15,80,2.0,299000,30,40.853004,-73.892627,East Tremont
3,2,BATHGATE,01,03053,0103,2327 BASSFORD AVENUE,,10458.0,1.0,0.0,1.0,913.0,1248.0,1901.0,1,A1,10,2011-08-25,2030530103,2011/12,A1,20,45,2.0,376000,30,40.855847,-73.891755,Claremont-Bathgate
4,2,BATHGATE,02,02929,0128,559 CLAREMONT PARKWAY,,10457.0,2.0,0.0,2.0,2361.0,2394.0,1995.0,1,B1,365000,2011-07-13,2029290128,2011/12,B1,0,42,3.0,403000,42,40.837745,-73.900144,Claremont-Bathgate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425729,5,WOODROW,02,07316,0084,57 DARNELL LANE,,10309.0,2.0,0.0,2.0,2311.0,1500.0,1995.0,1,B2,620000,2019-06-20,5073160084,2018/19,B2,26,88,2.0,518000,44,40.536003,-74.223213,Charleston-Richmond Valley-Tottenville
425730,5,WOODROW,02,07317,0119,73 ROBIN COURT,,10309.0,2.0,0.0,2.0,4125.0,2160.0,1994.0,1,B2,625000,2019-05-21,5073170119,2018/19,B2,25,165,3.0,527000,40,40.536140,-74.222012,Charleston-Richmond Valley-Tottenville
425731,5,WOODROW,02,07349,0004,51 PHEASANT LANE,,10309.0,2.0,0.0,2.0,2123.0,1850.0,1998.0,1,B9,670000,2019-02-22,5073490004,2018/19,B9,23,91,2.0,581000,52,40.532829,-74.222422,Charleston-Richmond Valley-Tottenville
425732,5,WOODROW,02,07349,0035,33 QUAIL LANE,,10309.0,2.0,0.0,2.0,2255.0,2377.0,1998.0,1,B9,640000,2019-03-08,5073490035,2018/19,B9,27,83,3.0,517000,40,40.532390,-74.221234,Charleston-Richmond Valley-Tottenville


In [4]:
# based on the sale date column (s_sale_date), create additional columns for s_sale_month and s_sale_year.
df['s_sale_month'] = pd.to_datetime(df['s_sale_date']).map(lambda saledate : saledate.month)
df['s_sale_year'] = pd.to_datetime(df['s_sale_date']).map(lambda saledate : saledate.year)

# verify
display(df[['s_sale_date', 's_sale_month', 's_sale_year']])

Unnamed: 0,s_sale_date,s_sale_month,s_sale_year
0,2011-01-19,1,2011
1,2011-06-29,6,2011
2,2011-03-21,3,2011
3,2011-08-25,8,2011
4,2011-07-13,7,2011
...,...,...,...
425729,2019-06-20,6,2019
425730,2019-05-21,5,2019
425731,2019-02-22,2,2019
425732,2019-03-08,3,2019


In [5]:
# check dtype of the new cols
print(df['s_sale_month'].dtype)
print(df['s_sale_year'].dtype)

int64
int64


In [21]:
# trim out 2019 rows
df = df[df.s_sale_year != 2019]

In [23]:
# verify
df.s_sale_year.unique()

array([2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018])

## Read in the macroeconomic data and prepare for merge

In [8]:
# import the combined macroeconomic data from CSV file
macro = pd.read_csv(path + 'macroeconomic_data.csv')

In [9]:
# change the column names to be lower case, and replace white space with underscores
macro.rename(columns=lambda x: x.lower().replace(" ", "_"), inplace=True)
# add 'm' prefix to all cols
macro.rename(columns=lambda x: 'm_' + x, inplace=True)

In [10]:
# check the dtypes on the macro df year and month cols
print(macro['m_year'].dtype)
print(macro['m_month'].dtype)

int64
int64


In [12]:
macro.columns

Index(['m_year', 'm_month', 'm_10yr_treasury_constant_maturity_rate',
       'm_30yr_fixed_rate_mortgage_avg_in_us',
       'm_case_shiller_ny_home_price_index',
       'm_case_shiller_us_national_home_price_index', 'm_consumer_price_index',
       'm_core_cpi', 'm_effective_fed_funds_rate', 'm_inflation_rate',
       'm_population_level', 'm_real_gross_domestic_product', 'm_sp500_index',
       'm_unemployment_rate', 'm_us_real_median_household_income',
       'm_10yr_treasury_constant_maturity_rate_%chg',
       'm_30yr_fixed_rate_mortgage_avg_in_us_%chg',
       'm_case_shiller_ny_home_price_index_%chg',
       'm_case_shiller_us_national_home_price_index_%chg',
       'm_consumer_price_index_%chg', 'm_core_cpi_%chg',
       'm_effective_fed_funds_rate_%chg', 'm_inflation_rate_%chg',
       'm_population_level_%chg', 'm_real_gross_domestic_product_%chg',
       'm_sp500_index_%chg', 'm_unemployment_rate_%chg',
       'm_us_real_median_household_income_%chg'],
      dtype='object')

## Merge the sales-valuation data with macroeconomic data
Perform an inner join (intersection) of sales-valuation and macroeconomic datasets on key: (SALE YEAR, SALE MONTH)

Reference:
* https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.merge.html
* https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#database-style-dataframe-or-named-series-joining-merging

In [24]:
# join the merged sales-valuation dataframe with the macroeconomic data on (year, month) of the SALE DATE
merged = pd.merge(df, macro, how='inner', left_on=['s_sale_year', 's_sale_month'], right_on=['m_year', 'm_month'])

In [25]:
display(merged)

Unnamed: 0,s_borough,s_neighborhood,s_building_class_category,s_block,s_lot,s_address,s_apt_number,s_zipcode,s_residential_units,s_commercial_units,s_total_units,s_land_sqft,s_gross_sqft,s_year_built,s_tax_class_at_sale,s_building_class_at_sale,s_sale_price,s_sale_date,s_bbl,s_year,v_bldgcl,v_ltfront,v_ltdepth,v_stories,v_fullval,v_bldfront,v_latitude,v_longitude,v_nta,s_sale_month,s_sale_year,m_year,m_month,m_10yr_treasury_constant_maturity_rate,m_30yr_fixed_rate_mortgage_avg_in_us,m_case_shiller_ny_home_price_index,m_case_shiller_us_national_home_price_index,m_consumer_price_index,m_core_cpi,m_effective_fed_funds_rate,m_inflation_rate,m_population_level,m_real_gross_domestic_product,m_sp500_index,m_unemployment_rate,m_us_real_median_household_income,m_10yr_treasury_constant_maturity_rate_%chg,m_30yr_fixed_rate_mortgage_avg_in_us_%chg,m_case_shiller_ny_home_price_index_%chg,m_case_shiller_us_national_home_price_index_%chg,m_consumer_price_index_%chg,m_core_cpi_%chg,m_effective_fed_funds_rate_%chg,m_inflation_rate_%chg,m_population_level_%chg,m_real_gross_domestic_product_%chg,m_sp500_index_%chg,m_unemployment_rate_%chg,m_us_real_median_household_income_%chg
0,2,BATHGATE,01,03039,0064,467 EAST 185 STREET,,10458.0,1.0,0.0,1.0,1667.0,1296.0,1910.0,1,A1,329000,2011-01-19,2030390064,2010/11,A1,16,100,2.0,365000,44,40.856635,-73.892647,Claremont-Bathgate,1,2011,2011,1,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289
1,2,BATHGATE,02,03037,0006,4392 PARK AVENUE,,10457.0,2.0,0.0,2.0,2275.0,3240.0,1899.0,1,B2,115000,2011-01-29,2030370006,2010/11,B2,25,91,2.0,433000,50,40.852259,-73.896728,Claremont-Bathgate,1,2011,2011,1,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289
2,2,BATHGATE,02,03053,0022,511 EAST 183 STREET,,10458.0,2.0,1.0,3.0,2011.0,4280.0,1901.0,1,S2,167500,2011-01-28,2030530022,2010/11,S2,23,87,3.0,137000,60,40.854837,-73.892216,Claremont-Bathgate,1,2011,2011,1,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289
3,2,BATHGATE,03,03044,0056,1976 BATHGATE AVENUE,,10457.0,3.0,0.0,3.0,1493.0,2430.0,1931.0,1,C0,292500,2011-01-24,2030440056,2010/11,C0,18,82,2.0,398000,45,40.848517,-73.896224,Claremont-Bathgate,1,2011,2011,1,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289
4,2,BAYCHESTER,01,04735,0061,3360 WILSON AVENUE,,10469.0,1.0,0.0,1.0,1900.0,1681.0,1930.0,1,A5,255000,2011-01-20,2047350061,2010/11,A5,19,100,2.0,333000,33,40.874588,-73.850192,Eastchester-Edenwald-Baychester,1,2011,2011,1,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401785,5,WOODROW,02,06913,0048,554 DARLINGTON AVENUE,,10309.0,2.0,0.0,2.0,4000.0,1634.0,2005.0,1,B2,890000,2018-10-04,5069130048,2018/19,B2,40,100,2.0,793000,43,40.533587,-74.204538,Rossville-Woodrow,10,2018,2018,10,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427
401786,5,WOODROW,02,06924,0062,922 RATHBUN AVENUE,,10309.0,2.0,0.0,2.0,4300.0,2564.0,2009.0,1,B2,1030000,2018-10-11,5069240062,2018/19,B2,43,100,2.0,959000,39,40.536844,-74.206480,Rossville-Woodrow,10,2018,2018,10,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427
401787,5,WOODROW,02,06967,0005,987 RATHBUN AVENUE,,10309.0,2.0,0.0,2.0,2500.0,1440.0,1987.0,1,B9,615000,2018-10-24,5069670005,2018/19,B9,25,100,2.0,504000,45,40.536283,-74.208889,Rossville-Woodrow,10,2018,2018,10,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427
401788,5,WOODROW,02,07020,0434,59 LYNBROOK AVENUE,,10309.0,2.0,0.0,2.0,5260.0,2600.0,2001.0,1,B2,780000,2018-10-22,5070200434,2018/19,B2,40,131,2.0,839000,54,40.539312,-74.216695,Rossville-Woodrow,10,2018,2018,10,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427


In [26]:
merged.shape

(401790, 59)

In [27]:
# sanity check the merge -- check consistency of date fields
display(merged[['s_sale_price', 's_sale_date', 's_sale_month', 's_sale_year', 'm_year', 'm_month']])

Unnamed: 0,s_sale_price,s_sale_date,s_sale_month,s_sale_year,m_year,m_month
0,329000,2011-01-19,1,2011,2011,1
1,115000,2011-01-29,1,2011,2011,1
2,167500,2011-01-28,1,2011,2011,1
3,292500,2011-01-24,1,2011,2011,1
4,255000,2011-01-20,1,2011,2011,1
...,...,...,...,...,...,...
401785,890000,2018-10-04,10,2018,2018,10
401786,1030000,2018-10-11,10,2018,2018,10
401787,615000,2018-10-24,10,2018,2018,10
401788,780000,2018-10-22,10,2018,2018,10


In [28]:
# drop the redundant YEAR and MONTH cols in the merged df
merged = merged.drop(['m_year', 'm_month'], 1)

In [29]:
# validate
merged.columns

Index(['s_borough', 's_neighborhood', 's_building_class_category', 's_block',
       's_lot', 's_address', 's_apt_number', 's_zipcode',
       's_residential_units', 's_commercial_units', 's_total_units',
       's_land_sqft', 's_gross_sqft', 's_year_built', 's_tax_class_at_sale',
       's_building_class_at_sale', 's_sale_price', 's_sale_date', 's_bbl',
       's_year', 'v_bldgcl', 'v_ltfront', 'v_ltdepth', 'v_stories',
       'v_fullval', 'v_bldfront', 'v_latitude', 'v_longitude', 'v_nta',
       's_sale_month', 's_sale_year', 'm_10yr_treasury_constant_maturity_rate',
       'm_30yr_fixed_rate_mortgage_avg_in_us',
       'm_case_shiller_ny_home_price_index',
       'm_case_shiller_us_national_home_price_index', 'm_consumer_price_index',
       'm_core_cpi', 'm_effective_fed_funds_rate', 'm_inflation_rate',
       'm_population_level', 'm_real_gross_domestic_product', 'm_sp500_index',
       'm_unemployment_rate', 'm_us_real_median_household_income',
       'm_10yr_treasury_constant_m

In [30]:
sales_per_month = df.groupby(['s_sale_year', 's_sale_month'])['s_sale_date'].count()

# temporarily set pandas options to expand all rows
pd.options.display.max_rows = None
print(sales_per_month)

# reset the pandas option
pd.reset_option('display.max_rows')

s_sale_year  s_sale_month
2011         1               3008
             2               2941
             3               3599
             4               3213
             5               3422
             6               3954
             7               3675
             8               3851
             9               3388
             10              3112
             11              3031
             12              3323
2012         1               2967
             2               3230
             3               3460
             4               3355
             5               3865
             6               4118
             7               4138
             8               4438
             9               3454
             10              3576
             11              3434
             12              5032
2013         1               3499
             2               3286
             3               3580
             4               3935
             5        

## Write sales-valuation-macroeconomic merge result to pickle file

In [31]:
# make a copy of the merging progress so far
sales_val_macro = merged

# write as pickle file
sales_val_macro.to_pickle(path + 'merged-sales-val-macro.pkl')

In [32]:
# also write as CSV
sales_val_macro.to_csv(path + 'merged-sales-val-macro.csv')

## Read in the NYC demographic data

In [46]:
# import NYC demographic data
dem = pd.read_csv(path + 'demographic_data.csv')

In [47]:
# change the column names to be lower case, and replace white space with underscores
dem.rename(columns=lambda x: x.lower().replace(" ", "_"), inplace=True)
# add 'd' prefix to all cols in the NYC demographic dataset
dem.rename(columns=lambda x: 'd_' + x, inplace=True)

In [48]:
display(dem)

Unnamed: 0,d_year,d_region,d_pop_25_years_and_over,d_less_than_9th_grade,d_less_than_9th_grade_pct,d_9th_to_12th_grade_no_diploma,d_9th_to_12th_grade_no_diploma_pct,d_high_school_graduate,d_high_school_graduate_pct,d_some_college_no_degree,d_some_college_no_degree_pct,d_associates_degree,d_associates_degree_pct,d_bachelors_degree,d_bachelors_degree_pct,d_graduate_or_professional_degree,d_graduate_or_professional_degree_pct,d_total_population,d_white_population,d_white_population_pct,d_black_or_african_american_population,d_black_or_african_american_population_pct,d_american_indian_and_alaska_native_population,d_american_indian_and_alaska_native_population_pct,d_asian_population,d_asian_population_pct,d_native_hawaiian_and_other_pacific_islander_population,d_native_hawaiian_and_other_pacific_islander_population_pct,d_some_other_race_population,d_some_other_race_population_pct,d_total_housing_units,d_no_bedroom_units,d_no_bedroom_units_pct,d_1_bedroom_units,d_1_bedroom_units_pct,d_2_bedroom_units,d_2_bedroom_units_pct,d_3_bedroom_units,d_3_bedroom_units_pct,d_4_bedroom_units,d_4_bedroom_units_pct,d_5_bedroom_units,d_5_bedroom_units_pct,d_owner_occupied_units,d_units_less_than_50000,d_units_less_than_50000_pct,d_units_from_50000_to_99999,d_units_from_50000_to_99999_pct,d_units_from_100000_to_149999,d_units_from_100000_to_149999_pct,d_units_from_150000_to_199999,d_units_from_150000_to_199999_pct,d_units_from_200000_to_299999,d_units_from_200000_to_299999_pct,d_units_from_300000_to_499999,d_units_from_300000_to_499999_pct,d_units_from_500000_to_999999,d_units_from_500000_to_999999_pct,d_units_1000000_or_more,d_units_1000000_or_more_pct,d_median_housing_price,d_total_households,d_households_less_than_10000,d_households_less_than_10000_pct,d_households_from_10000_to_14999,d_households_from_10000_to_14999_pct,d_households_from_15000_to_24999,d_households_from_15000_to_24999_pct,d_households_from_25000_to_34999,d_households_from_25000_to_34999_pct,d_households_from_35000_to_49999,d_households_from_35000_to_49999_pct,d_households_from_50000_to_74999,d_households_from_50000_to_74999_pct,d_households_from_75000_to_99999,d_households_from_75000_to_99999_pct,d_households_from_100000_to_149999,d_households_from_100000_to_149999_pct,d_households_from_150000_to_199999,d_households_from_150000_to_199999_pct,d_households_200000_or_more,d_households_200000_or_more_pct,d_median_household_income_dollars,d_mean_household_income_dollars
0,2009,BK,1694150,181274.05,0.107,193133.1,0.114,472667.85,0.279,240569.3,0.142,108425.6,0.064,303252.85,0.179,194827.25,0.115,,,,,,,,,,,,,,977590,64520.94,0.066,274702.79,0.281,335313.37,0.343,210181.85,0.215,56700.22,0.058,38126.01,0.039,275036,5225.684,0.019,5500.72,0.02,4950.648,0.018,8251.08,0.03,17602.304,0.064,67658.856,0.246,138618.144,0.504,27228.564,0.099,570300,900297,103534.155,0.115,63020.79,0.07,104434.452,0.116,90929.997,0.101,110736.531,0.123,142246.926,0.158,96331.779,0.107,108035.64,0.12,41413.662,0.046,40513.365,0.045,47047,68521
1,2009,BX,858344,141626.76,0.165,127034.912,0.148,224027.784,0.261,146776.824,0.171,55792.36,0.065,107293.0,0.125,56650.704,0.066,,,,,,,,,,,,,,513457,23619.022,0.046,168413.896,0.328,176629.208,0.344,115527.825,0.225,19511.366,0.038,9242.226,0.018,98731,6812.439,0.069,5430.205,0.055,4936.55,0.05,4640.357,0.047,9971.831,0.101,38998.745,0.395,26558.639,0.269,1283.503,0.013,393800,477855,71678.25,0.15,42529.095,0.089,66899.7,0.14,54953.325,0.115,64032.57,0.134,74067.525,0.155,46351.935,0.097,38228.4,0.08,11946.375,0.025,7167.825,0.015,35667,50590
2,2009,MN,1223894,95463.732,0.078,75881.428,0.062,148091.174,0.121,129732.764,0.106,47731.866,0.039,372063.776,0.304,354929.26,0.29,,,,,,,,,,,,,,858284,133034.02,0.155,351896.44,0.41,254052.064,0.296,90978.104,0.106,21457.1,0.025,8582.84,0.01,188095,4702.375,0.025,2257.14,0.012,1880.95,0.01,2257.14,0.012,6395.23,0.034,28402.345,0.151,65457.06,0.348,76554.665,0.407,849000,749383,64446.938,0.086,41216.065,0.055,57702.491,0.077,55454.342,0.074,61449.406,0.082,93672.875,0.125,75687.683,0.101,98918.556,0.132,59201.257,0.079,140884.004,0.188,75012,140712
3,2009,QN,1620558,171779.148,0.106,162055.8,0.1,445653.45,0.275,249565.932,0.154,124782.966,0.077,301423.788,0.186,166917.474,0.103,,,,,,,,,,,,,,846183,41462.967,0.049,214930.482,0.254,264009.096,0.312,236931.24,0.28,60078.993,0.071,29616.405,0.035,358845,6818.055,0.019,7535.745,0.021,8971.125,0.025,18301.095,0.051,40549.485,0.113,113395.02,0.316,153226.815,0.427,9688.815,0.027,475600,792664,52315.824,0.066,34084.552,0.043,70547.096,0.089,72132.424,0.091,103838.984,0.131,142679.52,0.18,110180.296,0.139,120484.928,0.152,46767.176,0.059,38840.536,0.049,59580,77059
4,2009,SI,334373,17053.023,0.051,27418.586,0.082,111680.582,0.334,60855.886,0.182,25746.721,0.077,54502.799,0.163,36446.657,0.109,,,,,,,,,,,,,,180115,3242.07,0.018,24135.41,0.134,36923.575,0.205,80871.635,0.449,27737.71,0.154,7384.715,0.041,115847,2085.246,0.018,810.929,0.007,1042.623,0.009,1969.399,0.017,9499.454,0.082,58502.735,0.505,38808.745,0.335,3012.022,0.026,449400,167297,10539.711,0.063,6022.692,0.036,12547.275,0.075,14052.948,0.084,18068.076,0.108,24927.253,0.149,22585.095,0.135,31117.242,0.186,15725.918,0.094,11878.087,0.071,72091,90107
5,2010,BK,1649387,192978.279,0.117,163289.313,0.099,478322.23,0.29,249057.437,0.151,94015.059,0.057,295240.273,0.179,176484.409,0.107,2508340.0,1161361.42,0.463,877919.0,0.35,17558.38,0.007,283442.42,0.113,2508.34,0.001,218225.58,0.087,1000000,54000.0,0.054,286000.0,0.286,349000.0,0.349,220000.0,0.22,56000.0,0.056,34000.0,0.034,273500,4102.5,0.015,5743.5,0.021,3829.0,0.014,7384.5,0.027,18324.5,0.067,70289.5,0.257,138391.0,0.506,25709.0,0.094,566700,905317,105016.772,0.116,60656.239,0.067,105016.772,0.116,97774.236,0.108,123123.112,0.136,136702.867,0.151,93247.651,0.103,103206.138,0.114,46171.167,0.051,35307.363,0.039,45103,66964
6,2010,BX,857048,127700.152,0.149,136270.632,0.159,248543.92,0.29,143984.064,0.168,55708.12,0.065,91704.136,0.107,53136.976,0.062,1386657.0,346664.25,0.25,518609.718,0.374,15253.227,0.011,58239.594,0.042,2773.314,0.002,506129.805,0.365,512024,25601.2,0.05,174600.184,0.341,174088.16,0.34,109573.136,0.214,17920.84,0.035,9728.456,0.019,88493,5486.566,0.062,4336.157,0.049,4601.636,0.052,4159.171,0.047,8052.863,0.091,41945.682,0.474,18406.544,0.208,1415.888,0.016,388900,471912,71730.624,0.152,43887.816,0.093,68427.24,0.145,52854.144,0.112,63236.208,0.134,75505.92,0.16,42000.168,0.089,35865.312,0.076,11797.8,0.025,6134.856,0.013,34723,48745
7,2010,MN,1171294,93703.52,0.08,73791.522,0.063,153439.514,0.131,127671.046,0.109,40995.29,0.035,356073.376,0.304,324448.438,0.277,1586698.0,967885.78,0.61,285605.64,0.18,12693.584,0.008,199923.948,0.126,1586.698,0.001,193577.156,0.122,846962,124503.414,0.147,343019.61,0.405,260864.296,0.308,88931.01,0.105,22021.012,0.026,6775.696,0.008,161647,4849.41,0.03,2586.352,0.016,1454.823,0.009,1616.47,0.01,5011.057,0.031,24893.638,0.154,57061.391,0.353,64335.506,0.398,825900,726090,67526.37,0.093,35578.41,0.049,66074.19,0.091,52278.48,0.072,67526.37,0.093,95843.88,0.132,72609.0,0.1,93665.61,0.129,51552.39,0.071,122709.21,0.169,67986,127351
8,2010,QN,1554325,166312.775,0.107,138334.925,0.089,427439.375,0.275,245583.35,0.158,118128.7,0.076,299984.725,0.193,156986.825,0.101,2233841.0,982890.04,0.44,442300.518,0.198,22338.41,0.01,551758.727,0.247,4467.682,0.002,308270.058,0.138,834965,37573.425,0.045,222100.69,0.266,262179.01,0.314,226275.515,0.271,58447.55,0.07,28388.81,0.034,338609,6772.18,0.02,6433.571,0.019,8465.225,0.025,16591.841,0.049,37585.599,0.111,116142.887,0.343,137813.863,0.407,8803.834,0.026,464800,772332,51746.244,0.067,34754.94,0.045,78777.864,0.102,75688.536,0.098,101947.824,0.132,140564.424,0.182,101175.492,0.131,109671.144,0.142,47112.252,0.061,30120.948,0.039,56723,73293
9,2010,SI,316070,16435.64,0.052,21492.76,0.068,100826.33,0.319,57524.74,0.182,22757.04,0.072,62265.79,0.197,34767.7,0.11,469363.0,361409.51,0.77,53507.382,0.114,2346.815,0.005,39895.855,0.085,0.0,0.0,21121.335,0.045,176696,3357.224,0.019,22263.696,0.126,39049.816,0.221,81280.16,0.46,23323.872,0.132,7244.536,0.041,114045,1026.405,0.009,570.225,0.005,456.18,0.004,2394.945,0.021,7983.15,0.07,59987.67,0.526,37634.85,0.33,3763.485,0.033,452300,163816,10811.856,0.066,6388.824,0.039,13105.28,0.08,13105.28,0.08,15071.072,0.092,22770.424,0.139,20640.816,0.126,33090.832,0.202,16709.232,0.102,12122.384,0.074,75451,91994


## Prepare Sales data: map borough IDs to borough codes to prepare for merge with NYC demographic data

In [44]:
# map boro codes in the sales data into two-letter codes
'''
1 = Manhattan
2 = Bronx
3 = Brooklyn
4 = Queens
5 = Staten Island
'''

def map_boro_id_to_code(boro_id):
    if boro_id == 1:
        return "MN"
    elif boro_id == 2:
        return "BX"
    elif boro_id == 3:
        return "BK"
    elif boro_id == 4:
        return "QN"
    elif boro_id == 5:
        return "SI"

merged['s_borough_code'] = merged['s_borough'].map(lambda boro_id : map_boro_id_to_code(boro_id))

# validate mapping
display(merged[['s_borough', 's_borough_code']])

Unnamed: 0,s_borough,s_borough_code
0,2,BX
1,2,BX
2,2,BX
3,2,BX
4,2,BX
...,...,...
401785,5,SI
401786,5,SI
401787,5,SI
401788,5,SI


## Join sales-valuation-macroeconomic data with NYC demographic data on (YEAR, BORO)

In [49]:
# join sales-valuation-macro df with nyc demographic data on (year, boro)
merged = pd.merge(left=merged,
                  right=dem,
                  how='inner',
                  left_on=['s_sale_year', 's_borough_code'],
                  right_on=['d_year', 'd_region'])

In [50]:
# validate join
display(merged[['s_sale_year', 's_borough_code', 'd_year', 'd_region']])

Unnamed: 0,s_sale_year,s_borough_code,d_year,d_region
0,2011,BX,2011,BX
1,2011,BX,2011,BX
2,2011,BX,2011,BX
3,2011,BX,2011,BX
4,2011,BX,2011,BX
...,...,...,...,...
401785,2018,SI,2018,SI
401786,2018,SI,2018,SI
401787,2018,SI,2018,SI
401788,2018,SI,2018,SI


In [51]:
# drop the redundant d_year and d_region cols in the merged df
merged = merged.drop(['d_year', 'd_region'], 1)

## Write sales-valuation-macro-demographic data into pickle file

In [52]:
# check the merged df so far
display(merged)

Unnamed: 0,s_borough,s_neighborhood,s_building_class_category,s_block,s_lot,s_address,s_apt_number,s_zipcode,s_residential_units,s_commercial_units,s_total_units,s_land_sqft,s_gross_sqft,s_year_built,s_tax_class_at_sale,s_building_class_at_sale,s_sale_price,s_sale_date,s_bbl,s_year,v_bldgcl,v_ltfront,v_ltdepth,v_stories,v_fullval,v_bldfront,v_latitude,v_longitude,v_nta,s_sale_month,s_sale_year,m_10yr_treasury_constant_maturity_rate,m_30yr_fixed_rate_mortgage_avg_in_us,m_case_shiller_ny_home_price_index,m_case_shiller_us_national_home_price_index,m_consumer_price_index,m_core_cpi,m_effective_fed_funds_rate,m_inflation_rate,m_population_level,m_real_gross_domestic_product,m_sp500_index,m_unemployment_rate,m_us_real_median_household_income,m_10yr_treasury_constant_maturity_rate_%chg,m_30yr_fixed_rate_mortgage_avg_in_us_%chg,m_case_shiller_ny_home_price_index_%chg,m_case_shiller_us_national_home_price_index_%chg,m_consumer_price_index_%chg,m_core_cpi_%chg,m_effective_fed_funds_rate_%chg,m_inflation_rate_%chg,m_population_level_%chg,m_real_gross_domestic_product_%chg,m_sp500_index_%chg,m_unemployment_rate_%chg,m_us_real_median_household_income_%chg,s_borough_code,d_pop_25_years_and_over,d_less_than_9th_grade,d_less_than_9th_grade_pct,d_9th_to_12th_grade_no_diploma,d_9th_to_12th_grade_no_diploma_pct,d_high_school_graduate,d_high_school_graduate_pct,d_some_college_no_degree,d_some_college_no_degree_pct,d_associates_degree,d_associates_degree_pct,d_bachelors_degree,d_bachelors_degree_pct,d_graduate_or_professional_degree,d_graduate_or_professional_degree_pct,d_total_population,d_white_population,d_white_population_pct,d_black_or_african_american_population,d_black_or_african_american_population_pct,d_american_indian_and_alaska_native_population,d_american_indian_and_alaska_native_population_pct,d_asian_population,d_asian_population_pct,d_native_hawaiian_and_other_pacific_islander_population,d_native_hawaiian_and_other_pacific_islander_population_pct,d_some_other_race_population,d_some_other_race_population_pct,d_total_housing_units,d_no_bedroom_units,d_no_bedroom_units_pct,d_1_bedroom_units,d_1_bedroom_units_pct,d_2_bedroom_units,d_2_bedroom_units_pct,d_3_bedroom_units,d_3_bedroom_units_pct,d_4_bedroom_units,d_4_bedroom_units_pct,d_5_bedroom_units,d_5_bedroom_units_pct,d_owner_occupied_units,d_units_less_than_50000,d_units_less_than_50000_pct,d_units_from_50000_to_99999,d_units_from_50000_to_99999_pct,d_units_from_100000_to_149999,d_units_from_100000_to_149999_pct,d_units_from_150000_to_199999,d_units_from_150000_to_199999_pct,d_units_from_200000_to_299999,d_units_from_200000_to_299999_pct,d_units_from_300000_to_499999,d_units_from_300000_to_499999_pct,d_units_from_500000_to_999999,d_units_from_500000_to_999999_pct,d_units_1000000_or_more,d_units_1000000_or_more_pct,d_median_housing_price,d_total_households,d_households_less_than_10000,d_households_less_than_10000_pct,d_households_from_10000_to_14999,d_households_from_10000_to_14999_pct,d_households_from_15000_to_24999,d_households_from_15000_to_24999_pct,d_households_from_25000_to_34999,d_households_from_25000_to_34999_pct,d_households_from_35000_to_49999,d_households_from_35000_to_49999_pct,d_households_from_50000_to_74999,d_households_from_50000_to_74999_pct,d_households_from_75000_to_99999,d_households_from_75000_to_99999_pct,d_households_from_100000_to_149999,d_households_from_100000_to_149999_pct,d_households_from_150000_to_199999,d_households_from_150000_to_199999_pct,d_households_200000_or_more,d_households_200000_or_more_pct,d_median_household_income_dollars,d_mean_household_income_dollars
0,2,BATHGATE,01,03039,0064,467 EAST 185 STREET,,10458.0,1.0,0.0,1.0,1667.0,1296.0,1910.0,1,A1,329000,2011-01-19,2030390064,2010/11,A1,16,100,2.0,365000,44,40.856635,-73.892647,Claremont-Bathgate,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
1,2,BATHGATE,02,03037,0006,4392 PARK AVENUE,,10457.0,2.0,0.0,2.0,2275.0,3240.0,1899.0,1,B2,115000,2011-01-29,2030370006,2010/11,B2,25,91,2.0,433000,50,40.852259,-73.896728,Claremont-Bathgate,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
2,2,BATHGATE,02,03053,0022,511 EAST 183 STREET,,10458.0,2.0,1.0,3.0,2011.0,4280.0,1901.0,1,S2,167500,2011-01-28,2030530022,2010/11,S2,23,87,3.0,137000,60,40.854837,-73.892216,Claremont-Bathgate,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
3,2,BATHGATE,03,03044,0056,1976 BATHGATE AVENUE,,10457.0,3.0,0.0,3.0,1493.0,2430.0,1931.0,1,C0,292500,2011-01-24,2030440056,2010/11,C0,18,82,2.0,398000,45,40.848517,-73.896224,Claremont-Bathgate,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
4,2,BAYCHESTER,01,04735,0061,3360 WILSON AVENUE,,10469.0,1.0,0.0,1.0,1900.0,1681.0,1930.0,1,A5,255000,2011-01-20,2047350061,2010/11,A5,19,100,2.0,333000,33,40.874588,-73.850192,Eastchester-Edenwald-Baychester,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401785,5,WOODROW,02,06913,0048,554 DARLINGTON AVENUE,,10309.0,2.0,0.0,2.0,4000.0,1634.0,2005.0,1,B2,890000,2018-10-04,5069130048,2018/19,B2,40,100,2.0,793000,43,40.533587,-74.204538,Rossville-Woodrow,10,2018,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427,SI,332651,19293.758,0.058,21289.664,0.064,102789.159,0.309,53889.462,0.162,21289.664,0.064,69856.710,0.210,44242.583,0.133,476179.0,357610.429,0.751,54760.585,0.115,1428.537,0.003,52379.69,0.11,476.179,0.001,22856.592,0.048,181186,4348.464,0.024,27540.272,0.152,38954.99,0.215,82077.258,0.453,21742.32,0.120,6703.882,0.037,115872,1390.464,0.012,1158.720,0.010,463.488,0.004,1042.848,0.009,5677.728,0.049,37310.784,0.322,63497.856,0.548,5330.112,0.046,556000,167441,11553.429,0.069,6865.081,0.041,10883.665,0.065,9544.137,0.057,14065.044,0.084,22604.535,0.135,20930.125,0.125,32316.113,0.193,20595.243,0.123,18083.628,0.108,82166,101953
401786,5,WOODROW,02,06924,0062,922 RATHBUN AVENUE,,10309.0,2.0,0.0,2.0,4300.0,2564.0,2009.0,1,B2,1030000,2018-10-11,5069240062,2018/19,B2,43,100,2.0,959000,39,40.536844,-74.206480,Rossville-Woodrow,10,2018,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427,SI,332651,19293.758,0.058,21289.664,0.064,102789.159,0.309,53889.462,0.162,21289.664,0.064,69856.710,0.210,44242.583,0.133,476179.0,357610.429,0.751,54760.585,0.115,1428.537,0.003,52379.69,0.11,476.179,0.001,22856.592,0.048,181186,4348.464,0.024,27540.272,0.152,38954.99,0.215,82077.258,0.453,21742.32,0.120,6703.882,0.037,115872,1390.464,0.012,1158.720,0.010,463.488,0.004,1042.848,0.009,5677.728,0.049,37310.784,0.322,63497.856,0.548,5330.112,0.046,556000,167441,11553.429,0.069,6865.081,0.041,10883.665,0.065,9544.137,0.057,14065.044,0.084,22604.535,0.135,20930.125,0.125,32316.113,0.193,20595.243,0.123,18083.628,0.108,82166,101953
401787,5,WOODROW,02,06967,0005,987 RATHBUN AVENUE,,10309.0,2.0,0.0,2.0,2500.0,1440.0,1987.0,1,B9,615000,2018-10-24,5069670005,2018/19,B9,25,100,2.0,504000,45,40.536283,-74.208889,Rossville-Woodrow,10,2018,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427,SI,332651,19293.758,0.058,21289.664,0.064,102789.159,0.309,53889.462,0.162,21289.664,0.064,69856.710,0.210,44242.583,0.133,476179.0,357610.429,0.751,54760.585,0.115,1428.537,0.003,52379.69,0.11,476.179,0.001,22856.592,0.048,181186,4348.464,0.024,27540.272,0.152,38954.99,0.215,82077.258,0.453,21742.32,0.120,6703.882,0.037,115872,1390.464,0.012,1158.720,0.010,463.488,0.004,1042.848,0.009,5677.728,0.049,37310.784,0.322,63497.856,0.548,5330.112,0.046,556000,167441,11553.429,0.069,6865.081,0.041,10883.665,0.065,9544.137,0.057,14065.044,0.084,22604.535,0.135,20930.125,0.125,32316.113,0.193,20595.243,0.123,18083.628,0.108,82166,101953
401788,5,WOODROW,02,07020,0434,59 LYNBROOK AVENUE,,10309.0,2.0,0.0,2.0,5260.0,2600.0,2001.0,1,B2,780000,2018-10-22,5070200434,2018/19,B2,40,131,2.0,839000,54,40.539312,-74.216695,Rossville-Woodrow,10,2018,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427,SI,332651,19293.758,0.058,21289.664,0.064,102789.159,0.309,53889.462,0.162,21289.664,0.064,69856.710,0.210,44242.583,0.133,476179.0,357610.429,0.751,54760.585,0.115,1428.537,0.003,52379.69,0.11,476.179,0.001,22856.592,0.048,181186,4348.464,0.024,27540.272,0.152,38954.99,0.215,82077.258,0.453,21742.32,0.120,6703.882,0.037,115872,1390.464,0.012,1158.720,0.010,463.488,0.004,1042.848,0.009,5677.728,0.049,37310.784,0.322,63497.856,0.548,5330.112,0.046,556000,167441,11553.429,0.069,6865.081,0.041,10883.665,0.065,9544.137,0.057,14065.044,0.084,22604.535,0.135,20930.125,0.125,32316.113,0.193,20595.243,0.123,18083.628,0.108,82166,101953


In [53]:
# make a copy of the merging progress so far
sales_val_macro_dem = merged

# write as pickle file
sales_val_macro_dem.to_pickle(path + 'merged-sales-val-macro-dem.pkl')

In [54]:
# also write as a CSV file
sales_val_macro_dem.to_csv(path + 'merged-sales-val-macro-dem.csv')

## Read in the NYC crime data and prepare for merge

In [95]:
# read in arrests and complaints pickle files
arrests = pd.read_csv(path + 'arrests.csv')
complaints = pd.read_csv(path + 'complaints.csv')

In [98]:
# change the column names to be lower case, and replace white space with underscores
arrests.rename(columns=lambda x: x.lower().replace(" ", "_"), inplace=True)
complaints.rename(columns=lambda x: x.lower().replace(" ", "_"), inplace=True)

# rename "0" col as count
arrests.rename(columns={'0':'count'}, inplace=True)
complaints.rename(columns={'0':'count'}, inplace=True)

# add prefix to all cols depending on the source dataset
arrests.rename(columns=lambda x: 'a_' + x, inplace=True)
complaints.rename(columns=lambda x: 'c_' + x, inplace=True)

In [99]:
arrests.columns

Index(['a_year_month', 'a_borough', 'a_count'], dtype='object')

In [100]:
complaints.columns

Index(['c_year_month', 'c_borough', 'c_count'], dtype='object')

In [112]:
# split out year and month for arrests dataset
a_year_month = arrests['a_year_month'].str.split('-', n=1, expand=True)

arrests['a_year'] = a_year_month[0].astype('int')
arrests['a_month'] = a_year_month[1].astype('int')

# verify
display(arrests[['a_year_month', 'a_year', 'a_month']])

Unnamed: 0,a_year_month,a_year,a_month
0,2006-01,2006,1
1,2006-01,2006,1
2,2006-01,2006,1
3,2006-01,2006,1
4,2006-01,2006,1
...,...,...,...
835,2019-12,2019,12
836,2019-12,2019,12
837,2019-12,2019,12
838,2019-12,2019,12


In [114]:
# split out year and month for complaints dataset
c_year_month = complaints['c_year_month'].str.split('-', n=1, expand=True)

complaints['c_year'] = c_year_month[0].astype('int')
complaints['c_month'] = c_year_month[1].astype('int')

# verify
display(complaints[['c_year_month', 'c_year', 'c_month']])

Unnamed: 0,c_year_month,c_year,c_month
0,1900-03,1900,3
1,1900-05,1900,5
2,1900-08,1900,8
3,1900-08,1900,8
4,1900-11,1900,11
...,...,...,...
1967,2019-12,2019,12
1968,2019-12,2019,12
1969,2019-12,2019,12
1970,2019-12,2019,12


## Join sales-valuation-macroeconomic data with NYC demographic data on (YEAR, MONTH, BORO)

In [110]:
display(merged)

Unnamed: 0,s_borough,s_neighborhood,s_building_class_category,s_block,s_lot,s_address,s_apt_number,s_zipcode,s_residential_units,s_commercial_units,s_total_units,s_land_sqft,s_gross_sqft,s_year_built,s_tax_class_at_sale,s_building_class_at_sale,s_sale_price,s_sale_date,s_bbl,s_year,v_bldgcl,v_ltfront,v_ltdepth,v_stories,v_fullval,v_bldfront,v_latitude,v_longitude,v_nta,s_sale_month,s_sale_year,m_10yr_treasury_constant_maturity_rate,m_30yr_fixed_rate_mortgage_avg_in_us,m_case_shiller_ny_home_price_index,m_case_shiller_us_national_home_price_index,m_consumer_price_index,m_core_cpi,m_effective_fed_funds_rate,m_inflation_rate,m_population_level,m_real_gross_domestic_product,m_sp500_index,m_unemployment_rate,m_us_real_median_household_income,m_10yr_treasury_constant_maturity_rate_%chg,m_30yr_fixed_rate_mortgage_avg_in_us_%chg,m_case_shiller_ny_home_price_index_%chg,m_case_shiller_us_national_home_price_index_%chg,m_consumer_price_index_%chg,m_core_cpi_%chg,m_effective_fed_funds_rate_%chg,m_inflation_rate_%chg,m_population_level_%chg,m_real_gross_domestic_product_%chg,m_sp500_index_%chg,m_unemployment_rate_%chg,m_us_real_median_household_income_%chg,s_borough_code,d_pop_25_years_and_over,d_less_than_9th_grade,d_less_than_9th_grade_pct,d_9th_to_12th_grade_no_diploma,d_9th_to_12th_grade_no_diploma_pct,d_high_school_graduate,d_high_school_graduate_pct,d_some_college_no_degree,d_some_college_no_degree_pct,d_associates_degree,d_associates_degree_pct,d_bachelors_degree,d_bachelors_degree_pct,d_graduate_or_professional_degree,d_graduate_or_professional_degree_pct,d_total_population,d_white_population,d_white_population_pct,d_black_or_african_american_population,d_black_or_african_american_population_pct,d_american_indian_and_alaska_native_population,d_american_indian_and_alaska_native_population_pct,d_asian_population,d_asian_population_pct,d_native_hawaiian_and_other_pacific_islander_population,d_native_hawaiian_and_other_pacific_islander_population_pct,d_some_other_race_population,d_some_other_race_population_pct,d_total_housing_units,d_no_bedroom_units,d_no_bedroom_units_pct,d_1_bedroom_units,d_1_bedroom_units_pct,d_2_bedroom_units,d_2_bedroom_units_pct,d_3_bedroom_units,d_3_bedroom_units_pct,d_4_bedroom_units,d_4_bedroom_units_pct,d_5_bedroom_units,d_5_bedroom_units_pct,d_owner_occupied_units,d_units_less_than_50000,d_units_less_than_50000_pct,d_units_from_50000_to_99999,d_units_from_50000_to_99999_pct,d_units_from_100000_to_149999,d_units_from_100000_to_149999_pct,d_units_from_150000_to_199999,d_units_from_150000_to_199999_pct,d_units_from_200000_to_299999,d_units_from_200000_to_299999_pct,d_units_from_300000_to_499999,d_units_from_300000_to_499999_pct,d_units_from_500000_to_999999,d_units_from_500000_to_999999_pct,d_units_1000000_or_more,d_units_1000000_or_more_pct,d_median_housing_price,d_total_households,d_households_less_than_10000,d_households_less_than_10000_pct,d_households_from_10000_to_14999,d_households_from_10000_to_14999_pct,d_households_from_15000_to_24999,d_households_from_15000_to_24999_pct,d_households_from_25000_to_34999,d_households_from_25000_to_34999_pct,d_households_from_35000_to_49999,d_households_from_35000_to_49999_pct,d_households_from_50000_to_74999,d_households_from_50000_to_74999_pct,d_households_from_75000_to_99999,d_households_from_75000_to_99999_pct,d_households_from_100000_to_149999,d_households_from_100000_to_149999_pct,d_households_from_150000_to_199999,d_households_from_150000_to_199999_pct,d_households_200000_or_more,d_households_200000_or_more_pct,d_median_household_income_dollars,d_mean_household_income_dollars
0,2,BATHGATE,01,03039,0064,467 EAST 185 STREET,,10458.0,1.0,0.0,1.0,1667.0,1296.0,1910.0,1,A1,329000,2011-01-19,2030390064,2010/11,A1,16,100,2.0,365000,44,40.856635,-73.892647,Claremont-Bathgate,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
1,2,BATHGATE,02,03037,0006,4392 PARK AVENUE,,10457.0,2.0,0.0,2.0,2275.0,3240.0,1899.0,1,B2,115000,2011-01-29,2030370006,2010/11,B2,25,91,2.0,433000,50,40.852259,-73.896728,Claremont-Bathgate,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
2,2,BATHGATE,02,03053,0022,511 EAST 183 STREET,,10458.0,2.0,1.0,3.0,2011.0,4280.0,1901.0,1,S2,167500,2011-01-28,2030530022,2010/11,S2,23,87,3.0,137000,60,40.854837,-73.892216,Claremont-Bathgate,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
3,2,BATHGATE,03,03044,0056,1976 BATHGATE AVENUE,,10457.0,3.0,0.0,3.0,1493.0,2430.0,1931.0,1,C0,292500,2011-01-24,2030440056,2010/11,C0,18,82,2.0,398000,45,40.848517,-73.896224,Claremont-Bathgate,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
4,2,BAYCHESTER,01,04735,0061,3360 WILSON AVENUE,,10469.0,1.0,0.0,1.0,1900.0,1681.0,1930.0,1,A5,255000,2011-01-20,2047350061,2010/11,A5,19,100,2.0,333000,33,40.874588,-73.850192,Eastchester-Edenwald-Baychester,1,2011,3.391905,4.755,167.189630,141.525,221.187,222.803,0.168387,3.156842,311023.0,15712.754,1283.124286,9.1,57021.00,0.029070,0.008697,-0.006628,-0.003766,0.003243,0.002069,-0.080986,0.041710,0.000531,-0.000803,0.032955,-0.021505,-0.001289,BX,861146,137783.360,0.160,126588.462,0.147,237676.296,0.276,145533.674,0.169,63724.804,0.074,95587.206,0.111,54252.198,0.063,1392002.0,342432.492,0.246,503904.724,0.362,13920.020,0.010,55680.08,0.04,2784.004,0.002,524784.754,0.377,512320,30739.200,0.060,169577.920,0.331,173164.16,0.338,112710.400,0.220,17418.88,0.034,9221.760,0.018,88816,8259.888,0.093,3197.376,0.036,6750.016,0.076,4618.432,0.052,8082.256,0.091,38013.248,0.428,17940.832,0.202,1865.136,0.021,372100,467138,74274.942,0.159,44845.248,0.096,70537.838,0.151,52319.456,0.112,60727.940,0.130,68669.286,0.147,40641.006,0.087,37838.178,0.081,10744.174,0.023,6072.794,0.013,33101,48178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401785,5,WOODROW,02,06913,0048,554 DARLINGTON AVENUE,,10309.0,2.0,0.0,2.0,4000.0,1634.0,2005.0,1,B2,890000,2018-10-04,5069130048,2018/19,B2,40,100,2.0,793000,43,40.533587,-74.204538,Rossville-Woodrow,10,2018,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427,SI,332651,19293.758,0.058,21289.664,0.064,102789.159,0.309,53889.462,0.162,21289.664,0.064,69856.710,0.210,44242.583,0.133,476179.0,357610.429,0.751,54760.585,0.115,1428.537,0.003,52379.69,0.11,476.179,0.001,22856.592,0.048,181186,4348.464,0.024,27540.272,0.152,38954.99,0.215,82077.258,0.453,21742.32,0.120,6703.882,0.037,115872,1390.464,0.012,1158.720,0.010,463.488,0.004,1042.848,0.009,5677.728,0.049,37310.784,0.322,63497.856,0.548,5330.112,0.046,556000,167441,11553.429,0.069,6865.081,0.041,10883.665,0.065,9544.137,0.057,14065.044,0.084,22604.535,0.135,20930.125,0.125,32316.113,0.193,20595.243,0.123,18083.628,0.108,82166,101953
401786,5,WOODROW,02,06924,0062,922 RATHBUN AVENUE,,10309.0,2.0,0.0,2.0,4300.0,2564.0,2009.0,1,B2,1030000,2018-10-11,5069240062,2018/19,B2,43,100,2.0,959000,39,40.536844,-74.206480,Rossville-Woodrow,10,2018,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427,SI,332651,19293.758,0.058,21289.664,0.064,102789.159,0.309,53889.462,0.162,21289.664,0.064,69856.710,0.210,44242.583,0.133,476179.0,357610.429,0.751,54760.585,0.115,1428.537,0.003,52379.69,0.11,476.179,0.001,22856.592,0.048,181186,4348.464,0.024,27540.272,0.152,38954.99,0.215,82077.258,0.453,21742.32,0.120,6703.882,0.037,115872,1390.464,0.012,1158.720,0.010,463.488,0.004,1042.848,0.009,5677.728,0.049,37310.784,0.322,63497.856,0.548,5330.112,0.046,556000,167441,11553.429,0.069,6865.081,0.041,10883.665,0.065,9544.137,0.057,14065.044,0.084,22604.535,0.135,20930.125,0.125,32316.113,0.193,20595.243,0.123,18083.628,0.108,82166,101953
401787,5,WOODROW,02,06967,0005,987 RATHBUN AVENUE,,10309.0,2.0,0.0,2.0,2500.0,1440.0,1987.0,1,B9,615000,2018-10-24,5069670005,2018/19,B9,25,100,2.0,504000,45,40.536283,-74.208889,Rossville-Woodrow,10,2018,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427,SI,332651,19293.758,0.058,21289.664,0.064,102789.159,0.309,53889.462,0.162,21289.664,0.064,69856.710,0.210,44242.583,0.133,476179.0,357610.429,0.751,54760.585,0.115,1428.537,0.003,52379.69,0.11,476.179,0.001,22856.592,0.048,181186,4348.464,0.024,27540.272,0.152,38954.99,0.215,82077.258,0.453,21742.32,0.120,6703.882,0.037,115872,1390.464,0.012,1158.720,0.010,463.488,0.004,1042.848,0.009,5677.728,0.049,37310.784,0.322,63497.856,0.548,5330.112,0.046,556000,167441,11553.429,0.069,6865.081,0.041,10883.665,0.065,9544.137,0.057,14065.044,0.084,22604.535,0.135,20930.125,0.125,32316.113,0.193,20595.243,0.123,18083.628,0.108,82166,101953
401788,5,WOODROW,02,07020,0434,59 LYNBROOK AVENUE,,10309.0,2.0,0.0,2.0,5260.0,2600.0,2001.0,1,B2,780000,2018-10-22,5070200434,2018/19,B2,40,131,2.0,839000,54,40.539312,-74.216695,Rossville-Woodrow,10,2018,3.155652,4.830,200.124854,205.158,252.777,258.993,2.187742,1.969803,327477.0,18813.923,2785.464783,3.8,67608.25,0.052937,0.043760,0.004411,0.003758,0.002046,0.001496,0.119240,-0.025975,0.000455,0.001092,-0.039992,0.027027,0.005427,SI,332651,19293.758,0.058,21289.664,0.064,102789.159,0.309,53889.462,0.162,21289.664,0.064,69856.710,0.210,44242.583,0.133,476179.0,357610.429,0.751,54760.585,0.115,1428.537,0.003,52379.69,0.11,476.179,0.001,22856.592,0.048,181186,4348.464,0.024,27540.272,0.152,38954.99,0.215,82077.258,0.453,21742.32,0.120,6703.882,0.037,115872,1390.464,0.012,1158.720,0.010,463.488,0.004,1042.848,0.009,5677.728,0.049,37310.784,0.322,63497.856,0.548,5330.112,0.046,556000,167441,11553.429,0.069,6865.081,0.041,10883.665,0.065,9544.137,0.057,14065.044,0.084,22604.535,0.135,20930.125,0.125,32316.113,0.193,20595.243,0.123,18083.628,0.108,82166,101953


In [115]:
display(arrests)

Unnamed: 0,a_year_month,a_borough,a_count,a_year,a_month
0,2006-01,1.0,8323,2006,1
1,2006-01,2.0,7338,2006,1
2,2006-01,3.0,8483,2006,1
3,2006-01,4.0,6028,2006,1
4,2006-01,5.0,989,2006,1
...,...,...,...,...,...
835,2019-12,1.0,3606,2019,12
836,2019-12,2.0,3024,2019,12
837,2019-12,3.0,3591,2019,12
838,2019-12,4.0,2840,2019,12


In [116]:
# join merged df with arrests data
merged = pd.merge(left=merged,
                  right=arrests,
                  how='inner',
                  left_on=['s_sale_year', 's_sale_month', 's_borough'],
                  right_on=['a_year', 'a_month', 'a_borough'])

In [117]:
# join merged df with complaints data
merged = pd.merge(left=merged,
                  right=complaints,
                  how='inner',
                  left_on=['s_sale_year', 's_sale_month', 's_borough'],
                  right_on=['c_year', 'c_month', 'c_borough'])

In [119]:
display(merged[['s_sale_year', 'a_year', 'c_year', 's_sale_month', 'a_month', 'c_month', 's_borough', 'a_borough', 'c_borough']])

Unnamed: 0,s_sale_year,a_year,c_year,s_sale_month,a_month,c_month,s_borough,a_borough,c_borough
0,2011,2011,2011,1,1,1,2,2.0,2.0
1,2011,2011,2011,1,1,1,2,2.0,2.0
2,2011,2011,2011,1,1,1,2,2.0,2.0
3,2011,2011,2011,1,1,1,2,2.0,2.0
4,2011,2011,2011,1,1,1,2,2.0,2.0
...,...,...,...,...,...,...,...,...,...
401785,2018,2018,2018,10,10,10,5,5.0,5.0
401786,2018,2018,2018,10,10,10,5,5.0,5.0
401787,2018,2018,2018,10,10,10,5,5.0,5.0
401788,2018,2018,2018,10,10,10,5,5.0,5.0


In [120]:
# drop redundant cols
merged = merged.drop(['a_year', 'a_month', 'a_borough', 'c_year', 'c_month', 'c_borough'], 1)

In [121]:
# write out the final merged dataframe
merged.to_pickle(path + 'merged-sales-val-macro-dem-crime.pkl')
merged.to_csv(path + 'merged-sales-val-macro-dem-crime.csv')