# Running the Prophet Model on selected zipcodes

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from matplotlib.pylab import rcParams
%matplotlib inline
import warnings
from fbprophet import Prophet as proph
import seaborn as sns
import pickle
import random
import functions as fn

plt.style.use('fivethirtyeight')
warnings.filterwarnings('ignore')

In [None]:
#Running the annualised_returns notebook to generate the pickle.

%run ./annualised_return.ipynb

In [None]:
#Retrieving the two pickle files; the train data pickle and the annualised_returns pickle

with open('train.pickle', 'rb') as f:
    train_df = pickle.load(f)

with open('annualised_returns.pickle', 'rb') as f:
    annualised_returns = pickle.load(f)

Subsetting the list of zipcodes that yielded over 15 annualised return (noted as over15pct in the annualised_return notebook).  

In [None]:
over15pct = list(annualised_returns.loc[annualised_returns['Ann_returns'] > 0.15]['RegionName'])

In [None]:
#Creating a list with the unique zipcodes in the train set.

unique = list((train_df['RegionName'].unique()))

The zipcodes from the annualised return list are also present in the training set as these come from the same source file.

## Running a prophet model for a zipcode drawn at random from the annualised return list

In [None]:
#Selecting a random zipcode

random_zip = random.choice(over15pct)

In [None]:
#Generating a new dataframe with the value of the zipcode at the different dates

test_zip_df = train_df.loc[(train_df['RegionName'] == random_zip)][['time', 'value']]

In [None]:
# Renaming the columns [time, value] to [ds, y] as required by the prophet model

test_zip_df = test_zip_df.rename(columns={'time': 'ds', 'value': 'y'})

In [None]:
# Plotting the timeseries from the random zipcode

idx = test_zip_df.set_index('ds')
sns.lineplot(idx.index, idx['y'])
plt.title(f'Median value of homes in zipcode {random_zip} / month')
plt.xlabel('Date')
plt.show();

Initiating the prophet model

In [None]:
#Calling the model and fitting the time series from the randomly selected zipcode.

Model = proph(interval_width=0.95) #Setting the uncertainty interval to 95%.
Model.fit(test_zip_df)

In [None]:
#Using the make_future_dataframe function with a monthly frequency and 36 monthly periods. 

future_dates = Model.make_future_dataframe(periods=36, freq='MS')
future_dates.tail()

In [None]:
#Predicting the values for future dates.

forecast = Model.predict(future_dates)

In [None]:
#Subsetting the relevant columns.

forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [None]:
#Using prophet's plot function to plot the predictions

Model.plot(forecast, uncertainty=True)
plt.show()

After a successful prophet model for one zipcode, we do the same for the remaining 206.

## Running a prophet model for all zipcodes drawn in the annualised return list

In [None]:
best_past_performers = fn.prophet_forecast(train_df, over15pct)

In [None]:
merged = fn.dict_to_df(best_past_performers)

In [None]:
#returning the range for the years we are forecasting
years = range(2018, 2022)
#creating a list which will contain each year's first date. (Prediction start from June)
year_month_list = [datetime.strptime(f'{year}-06-01', '%Y-%m-%d').date() for year in years]
#subseting the merged dataframe to only include the specified dates in year_month_list
forecast_returns = merged.loc[merged['ds'].isin(year_month_list)]

In [None]:
#Calculating each forecasted years return and dropping arising null values.

forecast_returns['returns'] = forecast_returns['yhat'].div(forecast_returns.groupby('RegionName')['yhat'].shift(1))
forecast_returns = forecast_returns.dropna(subset=['returns'])

In [None]:
forecast_returns = fn.annualised_returns(forecast_returns)

In [None]:
plt.hist(forecast_returns['Ann_returns'])
plt.title('Distribution of predicted annualised returns');

In [None]:
top_forecast_returns= forecast_returns.loc[forecast_returns['Ann_returns'] > 0.15]
print(f'The number of zipcodes that have yielded a predicted annualised return of over 15% is {len(top_forecast_returns)}.')

In [None]:
top_forecast_returns

Saving the subset of zipcodes that are predicted to generate an annualised rate of return of over 15% for the next 3 years.

In [None]:
with open('pred_returns.pickle', 'wb') as f:
    pickle.dump(top_forecast_returns, f, pickle.HIGHEST_PROTOCOL)