In [1]:
# Import essential libraries and packages
import pandas as pd
import numpy as np
import seaborn as sns
import math
import scipy as sp
import itertools
from pandas.tseries.offsets import DateOffset
import os  #provides functions for interacting with the operating system


from matplotlib import pyplot as plt
from IPython.display import Image
import altair as alt
from pandas.plotting import register_matplotlib_converters
from pandas.plotting import autocorrelation_plot

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
import pmdarima as pm
from pmdarima import model_selection
from fbprophet import Prophet
import pickle
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

# PLOT 1 

In [2]:
## Loading All Datasets 

df_alm = pd.read_pickle('df_alm.pickle')
df_con = pd.read_pickle('df_con.pickle')
df_mar = pd.read_pickle('df_mar.pickle')
df_nap = pd.read_pickle('df_nap.pickle')
df_sf  = pd.read_pickle('df_sf.pickle')
df_smt = pd.read_pickle('df_smt.pickle')
df_scl = pd.read_pickle('df_scl.pickle')
df_sol = pd.read_pickle('df_sol.pickle')
df_son = pd.read_pickle('df_son.pickle')

In [3]:
df_alm.head()

Unnamed: 0,Time,Alameda,Prediction ARIMA,lower_bond,higher_bond,Time2
0,2009-01-01,324170.0,,,,Jan 01 2009
1,2009-02-01,327370.0,,,,Feb 01 2009
2,2009-03-01,316440.0,,,,Mar 01 2009
3,2009-04-01,337100.0,,,,Apr 01 2009
4,2009-05-01,374850.0,,,,May 01 2009


In [4]:
df_alm.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 159 entries, 0 to 50
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Time              159 non-null    datetime64[ns]
 1   Alameda           147 non-null    float64       
 2   Prediction ARIMA  51 non-null     float64       
 3   lower_bond        51 non-null     float64       
 4   higher_bond       51 non-null     float64       
 5   Time2             159 non-null    object        
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 8.7+ KB


In [5]:
def df_manipulater_func (df):
    
    '''
        This function gets the DataFrame for each county and do these steps :
        - Add a Column as 'County' with the name of county in all rows.
        - Change the Price column name from county name to Actual
        - Change the Time2 Column to Date
        
    '''
    
    # Add a column as 'County' with the name of a county in all rows
    
    county_name = df.columns[1]
    df['County'] = county_name
    
    # change the price column from name of the county to 'Actual'
    
    df.rename(columns = {county_name:'Actual'}, inplace = True)
    
    # Change the 'Time2' column name to 'Date':
    df.rename(columns = {'Time2':'Date'}, inplace = True)

In [6]:
# create a list of all dfs of all counties 

df_list = [df_alm, df_con, df_mar, df_nap, df_sf, df_smt, df_scl, df_sol, df_son]

In [7]:
# apply manipulater function on all dfs:

for df_ in df_list: 
    df_manipulater_func(df_)

In [8]:
df_sol.head()

Unnamed: 0,Time,Actual,Prediction ARIMA,lower_bond,higher_bond,Date,County
0,2009-01-01,206034.0,,,,Jan 01 2009,Solano
1,2009-02-01,204779.0,,,,Feb 01 2009,Solano
2,2009-03-01,192285.0,,,,Mar 01 2009,Solano
3,2009-04-01,185769.0,,,,Apr 01 2009,Solano
4,2009-05-01,199676.0,,,,May 01 2009,Solano


In [9]:
# concat all dfs 

df_all_counties = pd.concat([df_alm, df_con, df_mar, df_nap, df_sf, df_smt, df_scl, df_sol, df_son])

In [10]:
input_dropdown = alt.binding_select(options=['Alameda','Contra Costa', 'Marin', 'Napa', 'San Francisco',
                                            'San Mateo','Santa Clara','Solano','Sonoma'], name="County    ")
selection = alt.selection_single(fields=['County'], bind=input_dropdown, init={'County': "San Francisco"})



base = alt.Chart(df_all_counties).transform_calculate(
    line1="'Actual'",
    line2="'Prediction'",
    shade="'95% Confidence Interval'").properties(
    height= 395,
    width = 865,title=['Single Family Home Price', 'Across Counties in Bay Area, California'],
)
    


scale = alt.Scale(domain=["Actual", "Prediction", "95% Confidence Interval"], range=['blue', 'red', 'grey'])



line1 = base.mark_line(opacity=0.8,color='blue').encode(
    x=alt.X('Date:T',title='Time'),
    y=alt.Y('Actual:Q',axis = alt.Axis(title="Price")),
    color = alt.Color('line1:N', scale=scale, title=' '),
)

line2 = base.mark_line(opacity=0.8,color='red').encode(
    x=alt.X('Date:T',title='Time'),
    y=alt.Y('Prediction ARIMA:Q',axis = alt.Axis(title="Price")),
    color = alt.Color('line2:N', scale=scale, title=' '),
)
  

shade = base.mark_area(opacity=0.25,color='grey').encode(
    x=alt.X('Date:T',title='Time'),
    y=alt.Y("lower_bond", axis = alt.Axis(title="Price")),
    y2='higher_bond',
    color =alt.Color('shade:N',scale=scale, title='')
)



# Tooltip Function 

def createTooltip():
    """
        This function creates a tooltip containing the date, and prices displayed upon hover
    
    """
    hover = alt.selection_single(
        fields=["Date"],
        nearest=True,
        on="mouseover",
        empty="none",
        clear="mouseout",
    )
    
    
    tooltips = alt.Chart(df_all_counties).transform_calculate(
    P="isValid(datum['Prediction ARIMA']) ? datum['Prediction ARIMA'] : 'NA'",
    A="isValid(datum['Actual']) ? datum['Actual'] : 'NA'",
    L="isValid(datum['lower_bond']) ? datum['lower_bond'] : 'NA'",
    U="isValid(datum['higher_bond']) ? datum['higher_bond'] : 'NA'").mark_rule(strokeWidth=0.5,  color="black", strokeDash=[4,2]).encode(
    x='Time',
    opacity=alt.condition(hover, alt.value(1), alt.value(0)),
    tooltip=[alt.Tooltip("Date:T", title = 'Date'),
             alt.Tooltip("A:N", title='Actual',format=",.0f"), 
             alt.Tooltip("P:N", title='Prediction', format=",.0f"),
             alt.Tooltip("L:N", title='95% Lower Limit', format=",.0f"),
             alt.Tooltip("U:N", title="95% Upper Limit", format=",.0f")]
).add_selection(hover)
    
    return tooltips


tooltips =  createTooltip()

#anchor='start', dx=30,
chart_a = alt.layer(line1+line2+shade, tooltips
    ).add_selection(selection).transform_filter(selection
    ).configure_title(fontSize=24,font='Helvetica Sans', color='dimgrey', dy=-15
    ).configure_legend(labelFontSize=12, labelColor='k'
    ).configure_axisY(labelPadding=10, titleX=-80, labelFontSize=12,titleFontSize=18, titleColor = 'dimgray', titleFont = 'Helvetica Sans' , labelColor = 'grey'
    ).configure_axisX(grid=False,titleY=30,titleFontSize=18, labelFontSize=12, titleColor = 'dimgray', titleFont = 'Helvetica Sans', labelColor = 'grey'
    ).configure_axis(gridOpacity = 0.6
    ).interactive()


chart_a

In [11]:
chart_a.save('chart_a.html')