In [None]:
import pandas as pd

fed_files = ["MORTGAGE30US.csv", "RRVRUSQ156N.csv", "CPIAUCSL.csv"]
dfs = [pd.read_csv(f, parse_dates=True, index_col=0) for f in fed_files]

In [None]:
fed_data = pd.concat(dfs, axis=1)
fed_data =fed_data.ffill()
fed_data.tail(40)

In [None]:
zillow_files = ['Metro_median_sale_price_uc_sfrcondo_week.csv', 'Metro_zhvi_uc_sfrcondo_tier_0.33_0.67_month.csv' ]

dfs = [pd.read_csv(f) for f in zillow_files]

In [None]:
dfs = [pd.DataFrame(df.iloc[0,5:]) for df in dfs]
for df in dfs:
    df.index = pd.to_datetime(df.index)
    df['month'] = df.index.to_period('M')

In [None]:
dfs[1]

In [None]:
price_data = dfs[0].merge(dfs[1], on='month')
price_data.index = dfs[0].index
price_data

In [None]:
del price_data['month']
price_data.columns = ['price', 'value']
price_data

In [None]:
fed_data = fed_data.dropna()
fed_data.tail(50)

In [None]:
from datetime import timedelta
fed_data.index = fed_data.index + timedelta(days=2)
fed_data

In [None]:

price_data = fed_data.merge(price_data, left_index=True, right_index=True)
price_data.columns = ['interest', 'vacancy', 'cpi', 'price', 'value']
price_data

In [None]:
price_data.plot.line(x='price', use_index=True)

In [None]:
price_data['adj_price'] = price_data['price'] / price_data['cpi']*100
price_data.plot.line(x='adj_price',  use_index=True)

In [None]:
price_data['adj_value'] = price_data['value'] / price_data['cpi']*100
price_data['next_quarter'] = price_data['adj_price'].shift(-13)
price_data

In [None]:
price_data['change'] = (price_data['next_quarter'] > price_data['adj_price']).astype(int)
price_data['change'].value_counts()

In [None]:
predictors = ['interest', 'vacancy', 'adj_price', 'adj_value']
target = 'change'

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
def predict (train, test,predictors, target):
    rf = RandomForestClassifier(min_samples_split=10, random_state=1)
    rf.fit(train[predictors], train[target])
    preds = rf.predict(test[predictors])
    return preds

def backtests (data, predictors, target,START=100, STEP=10):
    all_preds = []
    for i in range(START, data.shape[0], STEP):
        train = price_data.iloc[:i]
        test = price_data.iloc[i:(i+STEP)]
        all_preds.append(predict(train, test, predictors, target))
    preds = np.concatenate(all_preds)
    accuracy = accuracy_score(data.iloc[START:(START + len(preds))][target], preds)
    return preds, accuracy

In [105]:
preds, accuracy = backtests(price_data, predictors, target)
accuracy

0.6283464566929133

In [None]:
yearly =price_data.rolling(52,min_periods=1).mean()
yearly

In [None]:
year_ratios = [p+"_year" for p in predictors]
price_data[year_ratios] = yearly[predictors]/yearly[predictors]
price_data

In [104]:
preds, accuracy = backtests(price_data, predictors + year_ratios, target)
accuracy

0.5984251968503937