# Running the Prophet Model on the selected Zipcodes

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from matplotlib.pylab import rcParams
%matplotlib inline
import warnings
from fbprophet import Prophet as proph
import pickle
import random

plt.style.use('fivethirtyeight')
warnings.filterwarnings('ignore')

In [None]:
#Running the updated_zillow_data notebook in order to retrieve the splitted dataset.

%run ./updated_zillow_data.ipynb

In [None]:
#Retrieving the two pickle files; one contains the train data and the other contains the test data.

with open('train.pickle', 'rb') as f:
    train_df = pickle.load(f)

with open('test.pickle', 'rb') as f:
    test_df = pickle.load(f)

In [None]:
#Retrieving the one last pickle file which will contain the zipcodes with an annualised return of over 15%;

with open('annualised_returns.pickle', 'rb') as f:
    annualised_returns = pickle.load(f)

In [None]:
train_df.head()

In [None]:
#Creating a list with the unique zipcodes in the train set.

unique = list((train_df['RegionName'].unique()))

In [None]:
annualised_returns.head()

In [None]:
#Creating a list with the zipcodes whose annualised return yielded over 15%.

over15pct = list(annualised_returns.loc[annualised_returns['Ann_returns'] > 0.15]['RegionName'])

The zipcodes from the annualised return list are also present in the training set as these come from the same source file.

## Running a prophet model for a zipcode drawn at random from the annualised return list

In [None]:
#Selecting a random zipcode

random_zip = random.choice(over15pct)

In [None]:
#Generating a new dataframe with the value of the zipcode at the different dates

test_zip_df = train_df.loc[(train_df['RegionName'] == random_zip)][['time', 'value']]

In [None]:
# Renaming the columns [time, value] to [ds, y] as required by the prophet model

test_zip_df = test_zip_df.rename(columns={'time': 'ds', 'value': 'y'})

In [None]:
# Plotting the timeseries from the random zipcode

idx = test_zip_df.set_index('ds')
sns.lineplot(idx.index, idx['y'])
plt.title(f'Median value of homes in zipcode {random_zip} / month')
plt.xlabel('Date')
plt.show();

In [None]:
#Calling the prophet model and fitting the time series from the randomly selected zipcode.

Model = proph(interval_width=0.95) #Setting the uncertainty interval to 95% (the Prophet default is 80%).
Model.fit(test_zip_df)

In [None]:
#Using the make_future_dataframe function with a monthly frequency and periods = 36 which represents 3 years 

future_dates = Model.make_future_dataframe(periods=36, freq='MS')
future_dates.tail()

In [None]:
#Predicting the values for future dates and take the head of forecast

forecast = Model.predict(future_dates)
forecast.head()

In [None]:
#Subset above mentioned columns and view the tail 

forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

In [None]:
#Using prophet's plot function to plot the predictions

Model.plot(forecast, uncertainty=True)
plt.show()

In [None]:
Model.plot_components(forecast)
plt.show()

## Running a prophet model for all zipcodes drawn in the annualised return list

In [None]:
def prophet_forecast(df, intersection):
    """ Function that when inputed a dataframe and a list of zipcodes, retrieves a dictionary containing each
    zipcode as a key and the forecasted values from the Prophet model associated with that zipcode as values.
    """
    
    forecasts = {}
    
    for zipcode in intersection:
        returns = df.loc[(df['RegionName'] == zipcode)][['time', 'value']]
        returns = returns.rename(columns={'time': 'ds','value': 'y'})

        Model = proph(interval_width=0.95)
        Model.fit(returns)

        future_dates = Model.make_future_dataframe(periods=36, freq='MS')
        forecast = Model.predict(future_dates)

        forecasts[zipcode] = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

    return forecasts

In [None]:
best_past_performers = retrieving_zipcode_info(train_df, over15pct)

In [None]:
def dict_to_df(dictionary):
    """ Function that strips the dictionary into individual dataframes and appends one after the other 
    to create a merged dataframe.
    """

    merged = pd.DataFrame(data=None)
    for i in dictionary.keys():
        df = dictionary[i]
        df['RegionName'] = i
        merged = pd.concat([merged, df], axis=0)
    
    return merged

In [None]:
merged = dict_to_df(best_past_performers)

In [None]:
from datetime import datetime

years = range(2017, 2021)
year_month_list = [datetime.strptime(f'{year}-06-01', '%Y-%m-%d').date() for year in years]
forecast_returns = merged.loc[merged['ds'].isin(year_month_list)]

In [None]:
forecast_returns['returns'] = forecast_returns['yhat'].div(forecast_returns.groupby('RegionName')['yhat'].shift(1))

In [None]:
forecast_returns = forecast_returns.dropna(subset=['returns'])

In [None]:
def predicted_annualised_returns(df):
    
    pred_annualised_return = {}
    
    for zipcode in df['RegionName']:
        returns = list(df.loc[(df['RegionName'] == zipcode)]['returns'])
        
        tot_return = 1
        for r in returns:
            tot_return = tot_return * r 
        
        pred_annualised = (tot_return ** (1/len(returns))) - 1
        pred_annualised_return[zipcode] = pred_annualised 
        
    pred_annualised_return = pd.DataFrame(list(pred_annualised_return.items()),
                                      columns=['RegionName', 'Ann_returns'])
    
    pred_annualised_return = pred_annualised_return.sort_values('Ann_returns', ascending=False)
    
    return pred_annualised_return

In [None]:
forecast_returns = predicted_annualised_returns(forecast_returns)

In [None]:
plt.hist(forecast_returns['Ann_returns'])

In [None]:
top_forecast_returns= forecast_returns.loc[forecast_returns['Ann_returns'] > 0.15]
print(f'The number of zipcodes that have yielded an annualised return of over 15% is {len(top_forecast_returns)}.')

In [None]:
top_forecast_returns

In [None]:
with open('pred_returns.pickle', 'wb') as f:
    pickle.dump(top_forecast_returns, f, pickle.HIGHEST_PROTOCOL)