### Import Packages

In [1]:
import pandas as pd 
import numpy as np

import warnings
warnings.filterwarnings("ignore")

from pprint import pprint
from datetime import datetime

from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Append the entire repo parent director so files therein can be accessed in notebook
import os
import sys
import pathlib
sys.path.append(str(pathlib.Path().absolute().parent))

from src import helper
%load_ext autoreload

### Load Data

In [2]:
CA_time_interpolated = pd.read_pickle('../data/processed/interpolated_fillnaTime_df.pickle')
interp_preds = pd.read_csv('../data/predictions/rent_buy_zip_interp.csv')

In [3]:
interp_preds.rename(columns={'Unnamed: 0': 'zip_code'}, inplace=True)

In [None]:
CA_time_w_nulls = pd.read_pickle('../data/processed/interpolated_fillna0_df.pickle')

### ZipCode MetaData for Appreciation Rate Calculation Setup

In [5]:
cali_zips = CA_time_interpolated['ZipCode'].unique().tolist()
len(cali_zips)

1311

In [6]:
zip_metadata = dict()

for zipcode in cali_zips:
    # filter on the zipcode
    sub_df = CA_time_interpolated[CA_time_interpolated['ZipCode'] == zipcode].sort_values('ds', ascending=False)
    sub_df.reset_index(drop=True, inplace=True)
    
    # convert the 'ds' column to datetime
    n_recent = sub_df.loc[0, 'ds'].to_pydatetime()
    zhvi_0 = sub_df.loc[0, 'ZHVI_SingleFamilyResidence']
    zri_0 = sub_df.loc[0, 'Zri_MultiFamilyResidenceRental']
    
    zip_metadata[zipcode] = {
        'n_recent' : n_recent,
        'zhvi_0' : zhvi_0,
        'zri_0' : zri_0,
        'n' : helper.months_til_today(sub_df)
    }
    
len(zip_metadata)    

1311

In [7]:
zip_meta_df = pd.DataFrame(zip_metadata).T.reset_index()
zip_meta_df.rename(columns={'index':'zip_code'}, inplace=True)
zip_meta_df.head()

Unnamed: 0,zip_code,n_recent,zhvi_0,zri_0,n
0,90001,2017-12-31,367200.0,2193,27
1,90002,2017-12-31,355800.0,2201,27
2,90003,2017-12-31,381400.0,2206,27
3,90004,2017-12-31,1591700.0,2826,27
4,90005,2017-12-31,1470900.0,2751,27


In [8]:
interp_preds.rename(columns={
    'buy':'zhvi_n',
    'rent':'zri_n'
}, inplace=True)

In [9]:
merged_for_appr = interp_preds.merge(zip_meta_df, on='zip_code', how='left')
merged_for_appr.reset_index(drop=True, inplace=True)

In [10]:
merged_for_appr['y_zhvi'] = [np.log(zhvi_n/zhvi_0) for zhvi_n, zhvi_0 in zip(merged_for_appr['zhvi_n'], merged_for_appr['zhvi_0'])]
merged_for_appr['y_zri'] = [np.log(zri_n/zri_0) for zri_n, zri_0 in zip(merged_for_appr['zri_n'], merged_for_appr['zri_0'])]

### Note that predicted `zhvi_n` and `zri_n` are all les than `zhvi_0`, and `zri_0`, respectively

In [11]:
merged_for_appr.head()

Unnamed: 0,zip_code,zhvi_n,zri_n,n_recent,zhvi_0,zri_0,n,y_zhvi,y_zri
0,90001,290106.7,2107.267212,2017-12-31,367200.0,2193,27,-0.235658,-0.039879
1,90002,273803.7,2073.61767,2017-12-31,355800.0,2201,27,-0.261957,-0.059617
2,90003,292014.1,2111.681483,2017-12-31,381400.0,2206,27,-0.267047,-0.043696
3,90004,1668105.0,3066.411287,2017-12-31,1591700.0,2826,27,0.046886,0.081646
4,90005,1510442.0,2871.4835,2017-12-31,1470900.0,2751,27,0.026528,0.042864


In [13]:
(merged_for_appr['zhvi_n'] > merged_for_appr['zhvi_0']).sum()

456

In [14]:
(merged_for_appr['zri_n'] > merged_for_appr['zri_0']).sum()

891

In [34]:
test_check = merged_for_appr[merged_for_appr['zip_code'] == 90003]

print(test_check['zhvi_n'] - test_check['zhvi_0'])
print(test_check['zri_n'] - test_check['zri_0'])

2   -89385.9
dtype: object
2   -94.3185
dtype: object


In [17]:
merged_for_appr[merged_for_appr['y_zhvi'].isnull()]

Unnamed: 0,zip_code,zhvi_n,zri_n,n_recent,zhvi_0,zri_0,n,y_zhvi,y_zri
1231,93562,-66689.89,2663.971102,2017-12-31,80200,1965,27,,0.304326
1270,95113,-2685721.0,5951.109136,2017-12-31,606100,1965,27,,1.108085


In [47]:
skyes_updated_preds = {'90210': {'buy': '7139182.0948651815', 'rent': '5585.252900188447'}, 
                       '96161': {'buy': '678172.6440568111', 'rent': '2362.4637147408434'}, 
                       '90003': {'buy': '295360.6408715986', 'rent': '2456.4105652434223'}, 
                       '90044': {'buy': '367455.49184245645', 'rent': '2492.7508553227'}}

updated = pd.DataFrame(skyes_updated_preds).T
updated.reset_index(inplace=True)
updated.rename(columns={'index':'zip_code', 'buy':'upd_zhvi_n', 'rent': 'upd_zri_n'}, inplace=True)
updated['zip_code'] = updated['zip_code'].astype('int64')
updated['upd_zhvi_n'] = updated['upd_zhvi_n'].astype('float32')
updated['upd_zri_n'] = updated['upd_zri_n'].astype('float32')
updated.head()

Unnamed: 0,zip_code,upd_zhvi_n,upd_zri_n
0,90210,7139182.0,5585.25293
1,96161,678172.6,2362.463623
2,90003,295360.7,2456.410645
3,90044,367455.5,2492.750977


In [48]:
w_updated_preds = merged_for_appr.merge(updated, on='zip_code')
w_updated_preds

Unnamed: 0,zip_code,zhvi_n,zri_n,n_recent,zhvi_0,zri_0,n,y_zhvi,y_zri,upd_zhvi_n,upd_zri_n
0,90003,292014.1,2111.681483,2017-12-31,381400.0,2206,27,-0.267047,-0.043696,295360.7,2456.410645
1,90044,351040.7,2137.198959,2017-12-31,421900.0,2235,27,-0.183866,-0.044745,367455.5,2492.750977
2,90210,6303823.0,5045.896015,2017-12-31,6142100.0,4482,27,0.02599,0.118506,7139182.0,5585.25293
3,96161,601164.1,2028.848061,2017-12-31,606100.0,1965,27,-0.008177,0.031976,678172.6,2362.463623


In [52]:
print(w_updated_preds['upd_zhvi_n'] > w_updated_preds['zhvi_n'])
print(w_updated_preds['zhvi_n'] > w_updated_preds['zhvi_0'])
print(w_updated_preds['upd_zhvi_n'] > w_updated_preds['zhvi_0'])

0    True
1    True
2    True
3    True
dtype: bool
0    False
1    False
2     True
3    False
dtype: bool
0    False
1    False
2     True
3     True
dtype: bool
