In [None]:
import os 

os.getcwd()

os.chdir('/Users/matth/Dropbox/Seasonal Macro')

In [None]:
from calendar import month
import pandas as pd
import numpy as np
import matplotlib as plt
import matplotlib.pyplot
import statsmodels.formula.api as sm 
import os
from datetime import datetime
from pyspark.sql.functions import date_format
from pandas import ExcelWriter
import re as re
import itertools

In [None]:
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)

# Inflation Data


### Cleaning

In [None]:
#Defining Useful Lists/Housekeeping

"""
We read in data from the file 'MasterInflationData.xlsx'.

'data' is a LIST of DATAFRAMES, where each dataframe is read in from a corresponding SHEET_NAME in MasterInflationData
"""

data = []

sheet_names = ["All Items", "No Food or Energy", "Fuels and Utilities",
"Energy", "Gasoline (not dropped)", "Gasoline (dropped)",
"Apparel", "Commodities", "Durables", "New Vehicles", "Used Vehicles", 
"Medical Care", "Medical Care Services", "Services", "Education"]

sheet = pd.ExcelFile("Data/MasterInflationData.xlsx")

sheet_indices = range(len(sheet_names))


In [None]:

#Filling in data list with dataframes
for i in sheet_names:
    data.append(pd.read_excel(sheet, i, na_values=['NA'], usecols='A:D'))

for i in range(len(sheet_names)):
    data[i]['observation_date'].dt.to_pydatetime() 

In [None]:

#Creating monthly dummies
dummies = []

for i in range(len(sheet_names)):
    datetime_vals = data[i]["observation_date"].dt.to_pydatetime()
    data[i]["observation_date"] = pd.Series(datetime_vals, dtype = object)

for i in range(len(sheet_names)):
    data[i]["months"] = pd.DatetimeIndex(data[i]["observation_date"]).month
    dummies = pd.get_dummies(data[i]['months'])
    data[i] = data[i].join(dummies)


### Regressions

In [None]:
#Monthly % Change, 20 lags

#Each column labeled 1,2,..,12 is a dummy for January, February, ..., December
#data[i] gives us the table we are looking at, data[i].columns[2] is our outcome of interest (monthly % change)
reg_summary_monthly_20lags = []
reg_data_monthly_20lags = []

for i in range(len(sheet_names)):
    y = data[i][f"{data[i].columns[2]}"]
    x_1 = data[i][1]
    x_2 = data[i][2]
    x_3 = data[i][3]
    x_4 = data[i][4]
    x_5 = data[i][5]
    x_6 = data[i][6]
    x_7 = data[i][7]
    x_8 = data[i][8]
    x_9 = data[i][9]
    x_10 = data[i][10]
    x_11 = data[i][11]
    x_12 = data[i][12]
    ols = sm.ols(formula = 'y ~ x_1 + x_2 + x_3 + x_4 + x_5 + x_6 + x_7 + x_8 + x_9 + x_10 +x_11 +x_12 -1', data=data[i]).fit(cov_type='HAC',cov_kwds={'maxlags':20})
    reg_summary_monthly_20lags.append(ols.summary(yname = f"{data[i].columns[2]}", xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    reg_data_monthly_20lags.append(df)


In [None]:

#Monthly % Change, 40 lags
reg_summary_monthly_40lags = []
reg_data_monthly_40lags = []


for i in range(len(sheet_names)):
    y = data[i][f"{data[i].columns[2]}"]
    x_1 = data[i][1]
    x_2 = data[i][2]
    x_3 = data[i][3]
    x_4 = data[i][4]
    x_5 = data[i][5]
    x_6 = data[i][6]
    x_7 = data[i][7]
    x_8 = data[i][8]
    x_9 = data[i][9]
    x_10 = data[i][10]
    x_11 = data[i][11]
    x_12 = data[i][12]
    ols = sm.ols(formula = 'y ~ x_1 + x_2 + x_3 + x_4 + x_5 + x_6 + x_7 + x_8 + x_9 + x_10 + x_11 + x_12 -1', data = data[i]).fit(cov_type='HAC', cov_kwds={'maxlags':40})
    reg_summary_monthly_40lags.append(ols.summary(yname=f"{data[i].columns[2]}", xname=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame 
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    reg_data_monthly_40lags.append(df)

In [None]:

#3 Month % Change, 20 lags
reg_summary_3month_20lags = []
reg_data_3month_20lags = []


for i in range(len(sheet_names)):
    y = data[i][f"{data[i].columns[3]}"]
    x_1 = data[i][1]
    x_2 = data[i][2]
    x_3 = data[i][3]
    x_4 = data[i][4]
    x_5 = data[i][5]
    x_6 = data[i][6]
    x_7 = data[i][7]
    x_8 = data[i][8]
    x_9 = data[i][9]
    x_10 = data[i][10]
    x_11 = data[i][11]
    x_12 = data[i][12]
    ols = sm.ols(formula = 'y ~ x_1 + x_2 + x_3 + x_4 + x_5 + x_6 + x_7 + x_8 + x_9 + x_10 + x_11 + x_12 -1', data = data[i]).fit(cov_type='HAC', cov_kwds={'maxlags':20})
    reg_summary_3month_20lags.append(ols.summary(yname=f"{data[i].columns[3]}", xname=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame 
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    reg_data_3month_20lags.append(df)

In [None]:

#3 Month % Change, 40 lags

reg_summary_3month_40lags = []
reg_data_3month_40lags = []


for i in range(len(sheet_names)):
    y = data[i][f"{data[i].columns[3]}"]
    x_1 = data[i][1]
    x_2 = data[i][2]
    x_3 = data[i][3]
    x_4 = data[i][4]
    x_5 = data[i][5]
    x_6 = data[i][6]
    x_7 = data[i][7]
    x_8 = data[i][8]
    x_9 = data[i][9]
    x_10 = data[i][10]
    x_11 = data[i][11]
    x_12 = data[i][12]
    ols = sm.ols(formula = 'y ~ x_1 + x_2 + x_3 + x_4 + x_5 + x_6 + x_7 + x_8 + x_9 + x_10 + x_11 + x_12 -1', data = data[i]).fit(cov_type='HAC', cov_kwds={'maxlags':40})
    reg_summary_3month_40lags.append(ols.summary(yname=f"{data[i].columns[3]}", xname=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame 
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    reg_data_3month_40lags.append(df)

### Plots

In [None]:

#Visualizations: Monthly, 20 lags 

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(len(sheet_names)):
    plot = plt.pyplot.errorbar(x=month_list, y=reg_data_monthly_20lags[i]['coefficient'], yerr = reg_data_monthly_20lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Inflation/Monthly, 20 lags/{sheet_names[i]}_monthly_20lags.png', bbox_inches='tight')
    plt.pyplot.clf()

In [None]:
#Visualizations: Monthly, 40 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(len(sheet_names)):
    plot = plt.pyplot.errorbar(x=month_list, y=reg_data_monthly_40lags[i]['coefficient'], yerr = reg_data_monthly_40lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Inflation/Monthly, 40 lags/{sheet_names[i]}_monthly_40lags.png', bbox_inches='tight')
    plt.pyplot.clf()


In [None]:
#Visualizations: 3 Month, 20 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(len(sheet_names)):
    plot = plt.pyplot.errorbar(x=month_list, y=reg_data_3month_20lags[i]['coefficient'], yerr = reg_data_3month_20lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Inflation/3 month, 20 lags/{sheet_names[i]}_3month_20lags.png', bbox_inches='tight')
    plt.pyplot.clf()


In [None]:
#Visualizations: 3 Month, 40 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(len(sheet_names)):
    plot = plt.pyplot.errorbar(x=month_list, y=reg_data_3month_40lags[i]['coefficient'], yerr = reg_data_3month_40lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Inflation/3 month, 40 lags/{sheet_names[i]}_3month_40lags.png', bbox_inches='tight')
    plt.pyplot.clf()
