# Var Implementation
This notebook walks through our Vector Auto Regression implementation as contained in the var_module.py module. 

In [1]:
import var_module as var



## Intialize our custom parameters
Predictors must match available columns exactly. City must match an available city exactly

In [2]:
city = "Atlanta"
order = 4
data_path="pres-data.csv"
predictors = [
    'med_housing',
    'unemployment',
    'housing_listings',
    'population',
    'income'
]

key_var = 'med_housing'

## Initialize & Preprocess data
Ingest the raw data and generate the ouptut data frame with specified predictors

In [3]:
df = var.read_data(path=data_path, predictors=predictors)
df

Unnamed: 0,year_month,year,month,city,state,med_housing,unemployment,housing_listings,population,income
0,2016.07,2016,7,Atlanta,GA,272000.0000,5.1,14848.0,5791.874,48657.0
1,2016.08,2016,8,Atlanta,GA,269900.0000,5.1,13340.0,5791.874,48657.0
2,2016.09,2016,9,Atlanta,GA,270168.0000,5.1,13404.0,5791.874,48657.0
3,2016.10,2016,10,Atlanta,GA,269900.0000,5.1,12996.0,5791.874,48657.0
4,2016.11,2016,11,Atlanta,GA,269900.0000,5.1,10848.0,5791.874,48657.0
...,...,...,...,...,...,...,...,...,...,...
232,2019.08,2019,8,Tulsa,OK,241185.7143,3.1,1320.0,999.348,56680.0
233,2019.09,2019,9,Tulsa,OK,242685.7143,3.1,1116.0,999.348,56680.0
234,2019.10,2019,10,Tulsa,OK,248950.0000,3.1,1200.0,999.348,56680.0
235,2019.11,2019,11,Tulsa,OK,248500.0000,3.0,908.0,999.348,56680.0


In [4]:
input_df = var.generate_time_data(df=df, city=city, order=order, predictors=predictors)
input_df.tail(15)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["step"] = [i for i in range(order)]


Unnamed: 0,city_,year_month,med_housing_0,med_housing_1,med_housing_2,med_housing_3,unemployment_0,unemployment_1,unemployment_2,unemployment_3,...,population_3,income_0,income_1,income_2,income_3,med_housing,unemployment,housing_listings,population,income
23,Atlanta,2018.1,339900.0,330772.0,325000.0,324900.0,3.8,3.7,3.7,3.6,...,5951.394,53354.0,53354.0,53354.0,53354.0,319450.0,3.7,17204.0,5951.394,53354.0
24,Atlanta,2018.11,330772.0,325000.0,324900.0,319450.0,3.7,3.7,3.6,3.7,...,5951.394,53354.0,53354.0,53354.0,53354.0,319116.0,3.7,14788.0,5951.394,53354.0
25,Atlanta,2018.12,325000.0,324900.0,319450.0,319116.0,3.7,3.6,3.7,3.7,...,5951.394,53354.0,53354.0,53354.0,53354.0,315000.0,3.7,11948.0,5951.394,53354.0
26,Atlanta,2019.01,324900.0,319450.0,319116.0,315000.0,3.6,3.7,3.7,3.7,...,5951.394,53354.0,53354.0,53354.0,53354.0,313950.0,3.7,16000.0,6027.231,55668.0
27,Atlanta,2019.02,319450.0,319116.0,315000.0,313950.0,3.7,3.7,3.7,3.7,...,6027.231,53354.0,53354.0,53354.0,55668.0,317446.0,3.7,17880.0,6027.231,55668.0
28,Atlanta,2019.03,319116.0,315000.0,313950.0,317446.0,3.7,3.7,3.7,3.7,...,6027.231,53354.0,53354.0,55668.0,55668.0,324237.1429,3.6,18440.0,6027.231,55668.0
29,Atlanta,2019.04,315000.0,313950.0,317446.0,324237.1429,3.7,3.7,3.7,3.6,...,6027.231,53354.0,55668.0,55668.0,55668.0,329950.0,3.6,20836.0,6027.231,55668.0
30,Atlanta,2019.05,313950.0,317446.0,324237.1429,329950.0,3.7,3.7,3.6,3.6,...,6027.231,55668.0,55668.0,55668.0,55668.0,337510.0,3.5,21296.0,6027.231,55668.0
31,Atlanta,2019.06,317446.0,324237.1429,329950.0,337510.0,3.7,3.6,3.6,3.5,...,6027.231,55668.0,55668.0,55668.0,55668.0,334900.0,3.4,19756.0,6027.231,55668.0
32,Atlanta,2019.07,324237.1429,329950.0,337510.0,334900.0,3.6,3.6,3.5,3.4,...,6027.231,55668.0,55668.0,55668.0,55668.0,329450.0,3.3,19136.0,6027.231,55668.0


## Generate pymc3 Model
This will automatically write the code needed to generate a pymc3 model using our predictors that will be used for variational inference. See hidden cell for parameters in the model.

In [5]:
var.write_model_module(input_df=input_df, key_var=key_var)

import model_builder
model_comb = model_builder.build_model(input_df=input_df)

['med_housing', 'unemployment', 'housing_listings', 'population', 'income']
['med_housing_0', 'med_housing_1', 'med_housing_2', 'med_housing_3', 'unemployment_0', 'unemployment_1', 'unemployment_2', 'unemployment_3', 'housing_listings_0', 'housing_listings_1', 'housing_listings_2', 'housing_listings_3', 'population_0', 'population_1', 'population_2', 'population_3', 'income_0', 'income_1', 'income_2', 'income_3']


In [6]:
model_comb

<pymc3.model.Model at 0x23e12533250>

## Generate posterior predictions for our coefficients
This utilizes ADVI implementation of variational inference in pymc3 to generate our posterior predictions (this can take a few minutes)

In [None]:
parameters = var.generate_advi_posterior(model_comb)
parameters

Interrupted at 1,233 [2%]: Average Loss = 3,292.2


# Run the forward model 
input the starting time period (formatted yyyy.mm) and the number of steps that you want to advance in your predictions. This will deliver a dataframe with your posterior predictions for however many steps you want to project to. The standard deviation dataframe gives you the spread of each of the predictions.

In [None]:
mean_df, std_df = var.run_projections(order=order, input=input_df, start_year_month=2017.12, samples=1000000, steps=10, parameters=parameters)
mean_df

In [None]:
std_df

# Visualize

In [None]:
var.generate_comparative_lineplot(df=df, mean_df=mean_df, start_year=2017, max_year=2018, city='Atlanta', order=4)

In [None]:
var.plot_uncertainty(mean_df=mean_df, std_df=std_df, steps=8)