In [126]:
import pandas as pd
from datetime import datetime
from datetime import date
from os import listdir
from os.path import isfile, join
from functools import reduce
import numpy as np

In [127]:
df_sales = pd.read_csv('../data/project/NYC Residential Sales Master.csv', index_col=0)

In [128]:
def category_date(d):
    if date(2009,1,1) <= d <= date(2010,12,31):
        return 'p1'
    if date(2011,1,1) <= d <= date(2012,12,31):
        return 'p2'
    if date(2013,1,1) <= d <= date(2014,12,31):
        return 'p3'
    if date(2015,1,1) <= d <= date(2016,12,31):
        return 'p4'
    if date(2017,1,1) <= d <= date(2018,12,31):
        return 'p5'
    if d >= date(2019,1,1):
        return 'p6'
    return None

dict_p = {
    'p1': '2009-2010',
    'p2': '2011-2012',
    'p3': '2013-2014',
    'p4': '2015-2016',
    'p5': '2017-2018',
    'p6': '2019'
}

In [129]:
def group_sales(period, df):
    df_sales_p = df[df['period']==period]
    df_sales_p = df_sales_p.drop_duplicates(keep='first')\
                           .reset_index(drop=True)\
                           .groupby(['ZIP'])\
                           .agg({'SALE PRICE': ['mean', 
                                                'median', 
                                                'count']})

    df_sales_p.columns = [f"average sales price {dict_p[period]}", 
                          f"median sales price {dict_p[period]}", 
                          f"number of transactions {dict_p[period]}"]
    
    df_sales_p = df_sales_p.reset_index()\
                           .astype(dtype={'ZIP': str})\
                           .reset_index(drop=True)
    
    return df_sales_p

In [130]:
def gen_sales_summary(df_sales, export_path):
    df_sales['SALE DATE'] = df_sales['SALE DATE'].apply(lambda x: datetime.strptime(x, '%m/%d/%Y'))
    df_sales['period'] = df_sales['SALE DATE'].apply(category_date)
    
    groups = [p for p in df_sales['period'].unique() if p is not None]
    
    for period in groups:
        df_sales_p = group_sales(period, df_sales)
        df_sales_p.to_csv(f'{export_path}/sales_summary_{period}.csv', index=False)

In [131]:
gen_sales_summary(df_sales, '../data/project/sales_summary')

In [155]:
def merge_dfs(export_path, on, key):
    filenames = [f for f in listdir(export_path) if isfile(join(export_path, f)) and f != '.DS_Store']
    dfs = []
    
    for fn in filenames:
        df = pd.read_csv(f'{export_path}/{fn}')
        dfs.append(df)
    
    df_merged = reduce(lambda left,right: pd.merge(left,right,on=[on],
                                            how='outer'), dfs)
    
    columns = df_merged.columns
    accepted = [col for col in columns if key in col]
    
    return df_merged[[on]+accepted]

In [156]:
sales_summary = merge_dfs('../data/project/sales_summary', 'ZIP', 'average')
sales_summary

Unnamed: 0,ZIP,average sales price 2011-2012,average sales price 2013-2014,average sales price 2009-2010,average sales price 2015-2016,average sales price 2017-2018,average sales price 2019
0,10001,3.030000e+06,6.471821e+06,3.716200e+06,1.234745e+07,1.146666e+07,8.637500e+06
1,10002,3.581600e+06,5.982807e+06,2.982263e+06,8.209445e+06,8.088947e+06,1.166302e+07
2,10003,5.921644e+06,6.491348e+06,5.065943e+06,1.145188e+07,9.364492e+06,1.138807e+07
3,10009,3.990324e+06,6.558075e+06,2.654923e+06,7.877050e+06,7.163569e+06,4.331993e+06
4,10010,4.380000e+06,4.063758e+07,4.098167e+06,8.892167e+06,8.298229e+06,
...,...,...,...,...,...,...,...
172,10007,,1.933333e+07,9.451570e+06,9.400000e+06,1.137500e+07,1.295000e+07
173,10018,,6.506040e+06,3.275000e+06,6.178333e+06,1.523500e+07,5.025000e+06
174,10803,,1.028750e+06,1.617500e+06,1.630630e+06,,
175,10004,,,,5.117333e+06,,


In [157]:
donations_summary = merge_dfs('../data/project/Political_contributions_summary', 'zip code', 'total').rename(columns={'zip code': 'ZIP'})
donations_summary

Unnamed: 0,ZIP,total donations 2011-2012,total donations 2017-2018,total donations 2019,total donations 2013-2014,total donations 2009-2010,total donations 2015-2016
0,1,3900.0,17441.0,4614.0,2210.0,9031.0,5989.0
1,2,600.0,12080.0,2425.0,1000.0,19700.0,1561.0
2,3,300.0,6690.0,425.0,,3000.0,1350.0
3,4,1300.0,4163.0,717.0,1115.0,13125.0,140.0
4,5,600.0,4618.0,250.0,,2200.0,
...,...,...,...,...,...,...,...
47726,99778,,,,,,315.0
47727,99825,,,,,,372.0
47728,99830,,,,,,50.0
47729,99832,,,,,,35.0


In [158]:
sales_zipcodes = sales_summary['ZIP'].unique()
donations_zipcodes = donations_summary['ZIP'].unique()
zipcodes = list(set(sales_zipcodes).intersection(set(donations_zipcodes)))

In [159]:
sales_data = sales_summary.loc[sales_summary.ZIP.isin(zipcodes)][sales_summary.columns].reset_index(drop=True)
donations_data = donations_summary.loc[donations_summary.ZIP.isin(zipcodes)][donations_summary.columns].reset_index(drop=True)
full_data = pd.merge(sales_data, donations_data, on='ZIP', how='outer').astype(dtype={'ZIP': str})


In [160]:
full_data

Unnamed: 0,ZIP,average sales price 2011-2012,average sales price 2013-2014,average sales price 2009-2010,average sales price 2015-2016,average sales price 2017-2018,average sales price 2019,total donations 2011-2012,total donations 2017-2018,total donations 2019,total donations 2013-2014,total donations 2009-2010,total donations 2015-2016
0,10001,3.030000e+06,6.471821e+06,3.716200e+06,1.234745e+07,1.146666e+07,8.637500e+06,679053.0,6900889.0,1010357.0,1466314.0,287000.0,4887784.0
1,10002,3.581600e+06,5.982807e+06,2.982263e+06,8.209445e+06,8.088947e+06,1.166302e+07,68310.0,602952.0,270415.0,99263.0,45405.0,935572.0
2,10003,5.921644e+06,6.491348e+06,5.065943e+06,1.145188e+07,9.364492e+06,1.138807e+07,1335278.0,7097077.0,2549678.0,1692731.0,905278.0,5857988.0
3,10009,3.990324e+06,6.558075e+06,2.654923e+06,7.877050e+06,7.163569e+06,4.331993e+06,113605.0,1654671.0,581690.0,138460.0,245399.0,952470.0
4,10010,4.380000e+06,4.063758e+07,4.098167e+06,8.892167e+06,8.298229e+06,,2437036.0,24303059.0,2600137.0,1977931.0,794876.0,19932395.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,10007,,1.933333e+07,9.451570e+06,9.400000e+06,1.137500e+07,1.295000e+07,379961.0,1660626.0,478061.0,355207.0,194284.0,1492999.0
173,10018,,6.506040e+06,3.275000e+06,6.178333e+06,1.523500e+07,5.025000e+06,1304544.0,2165805.0,789681.0,320047.0,228315.0,1454937.0
174,10803,,1.028750e+06,1.617500e+06,1.630630e+06,,,147121.0,449268.0,155708.0,118435.0,103546.0,467784.0
175,10004,,,,5.117333e+06,,,315941.0,1349763.0,409908.0,435572.0,721756.0,1394689.0


In [161]:
sub_markets = pd.read_csv('../data/project/All_Properties with Political_contributions PLUTO_New_Data.csv')
dict_submarkets = dict(zip(sub_markets['ZIP'].astype(str), sub_markets['REIS SUBMARKET']))

In [162]:
def assign_submarket(zipcode, dict_submarkets):
    try:
        return dict_submarkets[zipcode]
    except:
        return None

In [163]:
full_data['submarket'] = full_data['ZIP'].apply(lambda x: assign_submarket(x, dict_submarkets))

In [164]:
full_data

Unnamed: 0,ZIP,average sales price 2011-2012,average sales price 2013-2014,average sales price 2009-2010,average sales price 2015-2016,average sales price 2017-2018,average sales price 2019,total donations 2011-2012,total donations 2017-2018,total donations 2019,total donations 2013-2014,total donations 2009-2010,total donations 2015-2016,submarket
0,10001,3.030000e+06,6.471821e+06,3.716200e+06,1.234745e+07,1.146666e+07,8.637500e+06,679053.0,6900889.0,1010357.0,1466314.0,287000.0,4887784.0,Midtown West
1,10002,3.581600e+06,5.982807e+06,2.982263e+06,8.209445e+06,8.088947e+06,1.166302e+07,68310.0,602952.0,270415.0,99263.0,45405.0,935572.0,West Village/Downtown
2,10003,5.921644e+06,6.491348e+06,5.065943e+06,1.145188e+07,9.364492e+06,1.138807e+07,1335278.0,7097077.0,2549678.0,1692731.0,905278.0,5857988.0,West Village/Downtown
3,10009,3.990324e+06,6.558075e+06,2.654923e+06,7.877050e+06,7.163569e+06,4.331993e+06,113605.0,1654671.0,581690.0,138460.0,245399.0,952470.0,West Village/Downtown
4,10010,4.380000e+06,4.063758e+07,4.098167e+06,8.892167e+06,8.298229e+06,,2437036.0,24303059.0,2600137.0,1977931.0,794876.0,19932395.0,Kings County
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,10007,,1.933333e+07,9.451570e+06,9.400000e+06,1.137500e+07,1.295000e+07,379961.0,1660626.0,478061.0,355207.0,194284.0,1492999.0,Kings County
173,10018,,6.506040e+06,3.275000e+06,6.178333e+06,1.523500e+07,5.025000e+06,1304544.0,2165805.0,789681.0,320047.0,228315.0,1454937.0,Midtown West
174,10803,,1.028750e+06,1.617500e+06,1.630630e+06,,,147121.0,449268.0,155708.0,118435.0,103546.0,467784.0,
175,10004,,,,5.117333e+06,,,315941.0,1349763.0,409908.0,435572.0,721756.0,1394689.0,West Village/Downtown


In [165]:
submarkets = [sub for sub in full_data.submarket.unique() if sub is not None]
submarkets

['Midtown West',
 'West Village/Downtown',
 'Kings County',
 'West village/Downtown',
 'Upper East Side',
 'Upper West Side',
 'Morningside/Washington',
 'Staten Island',
 'Bronx County',
 'Queens County']

In [177]:
data = full_data[full_data.submarket==submarkets[9]]
data.corr()

Unnamed: 0,average sales price 2011-2012,average sales price 2013-2014,average sales price 2009-2010,average sales price 2015-2016,average sales price 2017-2018,average sales price 2019,total donations 2011-2012,total donations 2017-2018,total donations 2019,total donations 2013-2014,total donations 2009-2010,total donations 2015-2016
average sales price 2011-2012,1.0,0.892749,0.945563,0.917602,0.914293,0.134965,0.425641,0.581025,0.653293,0.55424,0.435418,0.593458
average sales price 2013-2014,0.892749,1.0,0.852359,0.857371,0.869802,0.13043,0.430744,0.566286,0.620805,0.528229,0.397572,0.578995
average sales price 2009-2010,0.945563,0.852359,1.0,0.84919,0.860883,0.08735,0.410909,0.550684,0.619673,0.51432,0.408473,0.548711
average sales price 2015-2016,0.917602,0.857371,0.84919,1.0,0.966626,0.122746,0.419698,0.628516,0.642816,0.578219,0.434736,0.581319
average sales price 2017-2018,0.914293,0.869802,0.860883,0.966626,1.0,0.11465,0.349731,0.527374,0.582091,0.455654,0.364621,0.530293
average sales price 2019,0.134965,0.13043,0.08735,0.122746,0.11465,1.0,0.06998,0.114115,0.136768,0.11084,0.091115,0.142196
total donations 2011-2012,0.425641,0.430744,0.410909,0.419698,0.349731,0.06998,1.0,0.797068,0.815755,0.757275,0.834805,0.72344
total donations 2017-2018,0.581025,0.566286,0.550684,0.628516,0.527374,0.114115,0.797068,1.0,0.915058,0.959103,0.656996,0.869889
total donations 2019,0.653293,0.620805,0.619673,0.642816,0.582091,0.136768,0.815755,0.915058,1.0,0.854808,0.71288,0.810448
total donations 2013-2014,0.55424,0.528229,0.51432,0.578219,0.455654,0.11084,0.757275,0.959103,0.854808,1.0,0.636072,0.819759
