## Imports

In [None]:
import os 

os.getcwd()

os.chdir('/Users/matth/Dropbox/Seasonal Macro')

In [None]:
from calendar import month
import pandas as pd
import numpy as np
import matplotlib as plt
import matplotlib.pyplot
import statsmodels.formula.api as sm 
import os
from datetime import datetime
from pyspark.sql.functions import date_format
from pandas import ExcelWriter
import re as re


In [None]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

## Helper Functions

In [None]:
#Map Month Names to Numbers
def month_to_num(str):
    if str == 'Jan':
        return 1
    elif str == 'Feb':
        return 2
    elif str == 'Mar':
        return 3
    elif str == 'Apr':
        return 4
    elif str == 'May':
        return 5
    elif str == 'Jun':
        return 6
    elif str == 'Jul':
        return 7
    elif str == 'Aug':
        return 8
    elif str == 'Sep':
        return 9
    elif str == 'Oct':
        return 10
    elif str == 'Nov':
        return 11
    else:
        return 12

#Lagged % Change Calculation, Monthly/Quarterly
def perc_change_mon(series, index):
    lst=[]
    for j in range(len(series)):
        if j==0 or isinstance(series.at[index[j]], str) or isinstance(series.at[index[j-1]],str):
            lst.append(np.nan)
        else:
            lst.append((series.at[index[j]]-series.at[index[j-1]])/series.at[index[j-1]])
    return lst 

#Lagged % Change Calculation, 3 months
def perc_change_3mon(series, index):
    lst=[]
    for j in range(len(series)):
        if j in [0,1] or isinstance(series.at[index[j]], str) or isinstance(series.at[index[j-2]],str):
            lst.append(np.nan)
        else:
            lst.append((series.at[index[j]]-series.at[index[j-2]])/series.at[index[j-2]])
    return lst 


#Monthly Expenditure Share Calculation
def monthly_expenditure_share(series, year, index):
    lst=[]
    df = pd.DataFrame()
    df['Year'] = year
    df['Sales'] = series
    df['Annual Sales'] = df.groupby('Year').transform(lambda x: x.sum())
    for j in range(len(index)):
        lst.append(df['Sales'].at[index[j]]/df['Annual Sales'].at[index[j]] if df['Annual Sales'].at[index[j]] != 0 else np.nan)
    return lst

## Cleaning


Dataframes are two categories: seasonally adjusted vs not seasonally adjusted. Each category is broken up into granular data (individual NAICS codes) vs total data (first 7 rows), which is reported at the monthly and annual level

Dataframes are named as retail_[monthly/annual]\_[granular/total]_[adjusted/nonadjusted]


### NAICS Codes

In [None]:
#Checking NAICS codes

#Data is read in from 'mrtssales92-present.xls'.

#We were worried that each year in the retail sales dataset may report on different NAICS codes.
#This just checks to make sure that the NAICS codes are consistent across time.
#Problem years were 2000-2001, 2016-2017, 2020-2021. These were manually checked; the error came from extra notes at the bottom of some of these years.

naics_codes = []
problem_years = []

for i in range(1992, 2022):
    df=pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name = f'{i}', usecols='A', skiprows=10, skipfooter=45) #includes some NA's to make sure not cutting off any values 
    df.columns= ['NAICS Code']
    naics_codes.append(df)

for i in range(len(naics_codes)-1):
    if not naics_codes[i].equals(naics_codes[i+1]):
        problem_years.append([i+1992, i+1993])


### Nonadjusted

Granular, Monthly

In [70]:
#Defining Dataframe
retail_monthly_granular_nonadjusted=pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name='2010', usecols="B", skiprows = [i for i in range(1,13)]+[j for j in range(71,200)])
retail_monthly_granular_nonadjusted.columns=['Kind of Business']

In [71]:
#Populating Dataframe

#Note: rows are 0-indexed. Stopping at row 12 actually stops at row 13

for i in range(1992, 2022):
    retail_df_monthly = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name=f'{i}', usecols="C:N", skiprows = [i for i in range(1,13)]+[j for j in range(71,200)])
    retail_df_monthly.columns=[f'01-01-{i}', f'01-02-{i}', f'01-03-{i}', f'01-04-{i}', f'01-05-{i}', f'01-06-{i}', f'01-07-{i}', f'01-08-{i}', f'01-09-{i}', f'01-10-{i}', f'01-11-{i}', f'01-12-{i}']
    retail_df_monthly.columns=pd.to_numeric(retail_df_monthly.columns, errors='ignore')
    retail_df_monthly.columns=pd.to_datetime(retail_df_monthly.columns, format='%d-%m-%Y')
    retail_monthly_granular_nonadjusted=pd.concat([retail_monthly_granular_nonadjusted, retail_df_monthly], axis=1)

In [72]:
#Setting (S) and (NA) values to NaN 

#Granular, Monthly
retail_monthly_granular_nonadjusted = retail_monthly_granular_nonadjusted.applymap(lambda x: np.nan if (x=='(S)' or x=='(NA)') else x)


In [73]:
#Transpose

retail_monthly_granular_nonadjusted=retail_monthly_granular_nonadjusted.transpose()

In [74]:
#Changing Column Names/Index

#Monthly, Nonadjusted
retail_monthly_granular_nonadjusted.columns=[i for i in retail_monthly_granular_nonadjusted.iloc[0]]
retail_monthly_granular_nonadjusted.drop(index=retail_monthly_granular_nonadjusted.index[0], axis=0, inplace=True)


In [76]:
#Creating Month + Year Columns

retail_monthly_granular_nonadjusted['Year'] = pd.DatetimeIndex(retail_monthly_granular_nonadjusted.index).year
retail_monthly_granular_nonadjusted['Month'] = pd.DatetimeIndex(retail_monthly_granular_nonadjusted.index).month 

In [77]:
#Creating Monthly % Change Variables 

column_names=list(retail_monthly_granular_nonadjusted)
index_names=list(retail_monthly_granular_nonadjusted.index)

#3 Month % Change
threemonth_per_change_names=[f'3 Month % Change, {i}' for i in retail_monthly_granular_nonadjusted.columns[0:58]]

for i in range(58):
    retail_monthly_granular_nonadjusted[threemonth_per_change_names[i]]=perc_change_3mon(retail_monthly_granular_nonadjusted[column_names[i]], index_names)


#Monthly % Change
monthly_per_change_names=[f'Monthly % Change, {i}' for i in retail_monthly_granular_nonadjusted.columns[0:58]]

for i in range(58):
    retail_monthly_granular_nonadjusted[monthly_per_change_names[i]]=perc_change_mon(retail_monthly_granular_nonadjusted[column_names[i]], index_names)

  retail_monthly_granular_nonadjusted[monthly_per_change_names[i]]=perc_change_mon(retail_monthly_granular_nonadjusted[column_names[i]], index_names)
  retail_monthly_granular_nonadjusted[monthly_per_change_names[i]]=perc_change_mon(retail_monthly_granular_nonadjusted[column_names[i]], index_names)
  retail_monthly_granular_nonadjusted[monthly_per_change_names[i]]=perc_change_mon(retail_monthly_granular_nonadjusted[column_names[i]], index_names)
  retail_monthly_granular_nonadjusted[monthly_per_change_names[i]]=perc_change_mon(retail_monthly_granular_nonadjusted[column_names[i]], index_names)
  retail_monthly_granular_nonadjusted[monthly_per_change_names[i]]=perc_change_mon(retail_monthly_granular_nonadjusted[column_names[i]], index_names)
  retail_monthly_granular_nonadjusted[monthly_per_change_names[i]]=perc_change_mon(retail_monthly_granular_nonadjusted[column_names[i]], index_names)
  retail_monthly_granular_nonadjusted[monthly_per_change_names[i]]=perc_change_mon(retail_monthly_gr

In [78]:
#Creating Monthly Share of Annual Expenditure Category Columns

monthly_share_names=[f'Share of Annual Expenditure, {i}' for i in retail_monthly_granular_nonadjusted.columns[0:58]]

for i in range(58):
    retail_monthly_granular_nonadjusted[monthly_share_names[i]] = monthly_expenditure_share(retail_monthly_granular_nonadjusted[column_names[i]], retail_monthly_granular_nonadjusted['Year'], index_names)


  retail_monthly_granular_nonadjusted[monthly_share_names[i]] = monthly_expenditure_share(retail_monthly_granular_nonadjusted[column_names[i]], retail_monthly_granular_nonadjusted['Year'], index_names)
  retail_monthly_granular_nonadjusted[monthly_share_names[i]] = monthly_expenditure_share(retail_monthly_granular_nonadjusted[column_names[i]], retail_monthly_granular_nonadjusted['Year'], index_names)
  retail_monthly_granular_nonadjusted[monthly_share_names[i]] = monthly_expenditure_share(retail_monthly_granular_nonadjusted[column_names[i]], retail_monthly_granular_nonadjusted['Year'], index_names)
  retail_monthly_granular_nonadjusted[monthly_share_names[i]] = monthly_expenditure_share(retail_monthly_granular_nonadjusted[column_names[i]], retail_monthly_granular_nonadjusted['Year'], index_names)
  retail_monthly_granular_nonadjusted[monthly_share_names[i]] = monthly_expenditure_share(retail_monthly_granular_nonadjusted[column_names[i]], retail_monthly_granular_nonadjusted['Year'], ind

In [79]:
#Creating monthly dummies 

#Granular
month_dummies = pd.get_dummies(retail_monthly_granular_nonadjusted['Month'])
retail_monthly_granular_nonadjusted=retail_monthly_granular_nonadjusted.join(month_dummies)

#Pretty sure you can just regress on C('Month') instead of needing to create dummies like this

Total, Monthly

In [53]:
#Defining Dataframe

retail_monthly_total_nonadjusted=pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name='2010', usecols="B", skiprows = [i for i in range(1,6)]+[j for j in range(13,200)])
retail_monthly_total_nonadjusted.columns=['Kind of Business']

In [54]:
#Populating Dataframe

#Note: rows are 0-indexed. Stopping at row 12 actually stops at row 13

for i in range(1992, 2022):
    retail_df_total = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name=f'{i}', usecols="C:N", skiprows = [i for i in range(1,6)]+[j for j in range(13,200)]) 
    retail_df_total.columns=[f'01-01-{i}', f'01-02-{i}', f'01-03-{i}', f'01-04-{i}', f'01-05-{i}', f'01-06-{i}', f'01-07-{i}', f'01-08-{i}', f'01-09-{i}', f'01-10-{i}', f'01-11-{i}', f'01-12-{i}']
    retail_df_total.columns=pd.to_numeric(retail_df_total.columns, errors='ignore')
    retail_df_total.columns = pd.to_datetime(retail_df_total.columns, format='%d-%m-%Y')
    retail_monthly_total_nonadjusted=pd.concat([retail_monthly_total_nonadjusted, retail_df_total], axis=1)

In [55]:
#Setting (S) and (NA) values to NaN for nonadjusted dataframes

#Total, Monthly
retail_monthly_total_nonadjusted=retail_monthly_total_nonadjusted.applymap(lambda x: np.nan if (x=='(S)' or x=='(NA)') else x)


In [56]:
#Transpose
retail_monthly_total_nonadjusted=retail_monthly_total_nonadjusted.transpose()


In [58]:
#Changing Column Names/Index

retail_monthly_total_nonadjusted.columns=[i for i in retail_monthly_total_nonadjusted.iloc[0]]
retail_monthly_total_nonadjusted.drop(index=retail_monthly_total_nonadjusted.index[0], axis=0, inplace=True)


In [61]:
#Creating Month + Year Columns

retail_monthly_total_nonadjusted['Year'] = pd.DatetimeIndex(retail_monthly_total_nonadjusted.index).year
retail_monthly_total_nonadjusted['Month'] = pd.DatetimeIndex(retail_monthly_total_nonadjusted.index).month


In [65]:
#Creating Monthly % Change Variables (Nonadjusted, Total)

column_names_total=list(retail_monthly_total_nonadjusted)
index_names_total=list(retail_monthly_total_nonadjusted.index)

#Monthly % Change
monthly_per_change_names_total=[f'Monthly % Change, {i}' for i in retail_monthly_total_nonadjusted.columns[0:7]]

for i in range(7):
    retail_monthly_total_nonadjusted[monthly_per_change_names_total[i]]=perc_change_mon(retail_monthly_total_nonadjusted[column_names_total[i]], index_names_total)

#3 Month % Change
threemonth_per_change_names_total=[f'3 Month % Change, {i}' for i in retail_monthly_total_nonadjusted.columns[0:7]]

for i in range(7):
    retail_monthly_total_nonadjusted[threemonth_per_change_names_total[i]]=perc_change_3mon(retail_monthly_total_nonadjusted[column_names_total[i]], index_names_total)

In [67]:
#Creating Monthly Share of Annual Expenditure Category Columns

monthly_share_names_total=[f'Share of Annual Expenditure, {i}' for i in retail_monthly_total_nonadjusted.columns[0:7]]

for i in range(7):
    retail_monthly_total_nonadjusted[monthly_share_names_total[i]] = monthly_expenditure_share(retail_monthly_total_nonadjusted[column_names_total[i]], retail_monthly_total_nonadjusted['Year'], retail_monthly_total_nonadjusted.index)


In [80]:
#Creating monthly dummies 

#Total
retail_monthly_total_nonadjusted=retail_monthly_total_nonadjusted.join(month_dummies)


#Pretty sure you can just regress on C('Month') instead of needing to create dummies like this

Granular, Annual

In [None]:
#Defining Dataframe/Columns
retail_annual_granular_nonadjusted = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name='2010', usecols="B", skiprows = [i for i in range(1,13)]+[j for j in range(71,200)])
retail_annual_granular_nonadjusted.columns=['Kind of Business']


In [None]:
#Populating Dataframes

for i in range(1992, 2022):
    retail_df_annual_granular = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name=f'{i}', usecols="O", skiprows = [i for i in range(1,13)]+[j for j in range(71,200)])
    retail_df_annual_granular.columns=[f'{i} Average']
    retail_annual_granular_nonadjusted=pd.concat([retail_annual_granular_nonadjusted, retail_df_annual_granular], axis=1)

In [None]:
#Transpose

retail_annual_granular_nonadjusted=retail_annual_granular_nonadjusted.transpose()

In [None]:
#Changing Column Names/Index

retail_annual_granular_nonadjusted.columns=[i for i in retail_annual_granular_nonadjusted.iloc[0]]
retail_annual_granular_nonadjusted.drop(index=retail_annual_granular_nonadjusted.index[0], axis=0, inplace=True)

Total, Annual

In [None]:
#Defining Dataframe

retail_annual_total_nonadjusted = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name='2010', usecols="B", skiprows = [i for i in range(1,6)]+[j for j in range(13,200)])
retail_annual_total_nonadjusted.columns=['Kind of Business']


In [None]:
#Populating Dataframe

for i in range(1992, 2022):
    retail_df_annual_totals = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name=f'{i}', usecols="O", skiprows = [i for i in range(1,6)]+[j for j in range(13,200)]) 
    retail_df_annual_totals.columns=[f'{i} Average']
    retail_annual_total_nonadjusted=pd.concat([retail_annual_total_nonadjusted, retail_df_annual_totals], axis=1)


In [None]:
#Transpose
retail_annual_total_nonadjusted=retail_annual_total_nonadjusted.transpose()


In [None]:
#Changing Column Names/Index

retail_annual_total_nonadjusted.columns=[i for i in retail_annual_total_nonadjusted.iloc[0]]
retail_annual_total_nonadjusted.drop(index=retail_annual_total_nonadjusted.index[0], axis=0, inplace=True)

Granular, Quarterly

In [None]:
#Defining Quarterly Data 
retail_quarterly_granular_nonadjusted = retail_monthly_granular_nonadjusted[column_names].groupby(pd.PeriodIndex(retail_monthly_granular_nonadjusted.index, freq='Q'), axis='index').sum()

#Setting (S) and (NA) values to NaN 
retail_quarterly_granular_nonadjusted = retail_quarterly_granular_nonadjusted.applymap(lambda x: np.nan if (x=='(S)' or x=='(NA)') else x)

#Defining Quarter + Year Variables

retail_quarterly_granular_nonadjusted['Year'] = retail_quarterly_granular_nonadjusted.index.astype(str)
retail_quarterly_granular_nonadjusted['Year'] = retail_quarterly_granular_nonadjusted['Year'].map(lambda x: re.findall('(\d{4})', x)[0])
retail_quarterly_granular_nonadjusted['Quarter'] = retail_quarterly_granular_nonadjusted.index.astype(str)
retail_quarterly_granular_nonadjusted['Quarter'] = retail_quarterly_granular_nonadjusted['Quarter'].map(lambda x: re.findall('(\d$)', x)[0])

#Defining Quarterly Dummies

quarter_dummies = pd.get_dummies(retail_quarterly_granular_nonadjusted['Quarter'])
retail_quarterly_granular_nonadjusted=retail_quarterly_granular_nonadjusted.join(quarter_dummies)

#Pretty sure you can just regress on C('Month') instead of needing to create dummies like this

In [None]:
#Quarter % Changes
quarter_indices=retail_quarterly_granular_nonadjusted.index
quarter_names = list(quarter_indices)

quarterly_per_change_names=[f'Quarterly % Change, {i}' for i in retail_quarterly_granular_nonadjusted.columns[0:58]]

for i in range(58):
    retail_quarterly_granular_nonadjusted[quarterly_per_change_names[i]]=perc_change_mon(retail_quarterly_granular_nonadjusted[column_names[i]], quarter_indices)

retail_quarterly_granular_nonadjusted

Total, Quarterly

In [42]:
column_names_total=list(retail_monthly_total_nonadjusted)

#Defining Quarterly Data
retail_quarterly_total_nonadjusted = retail_monthly_total_nonadjusted[column_names_total].groupby(pd.PeriodIndex(retail_monthly_total_nonadjusted.index, freq='Q'), axis='index').sum()

#Setting (S) and (NA) values to NaN 
retail_quarterly_total_nonadjusted = retail_quarterly_total_nonadjusted.applymap(lambda x: np.nan if (x=='(S)' or x=='(NA)') else x)

#Defining Quarter + Year Variables
retail_quarterly_total_nonadjusted['Year'] = retail_quarterly_total_nonadjusted.index.astype(str)
retail_quarterly_total_nonadjusted['Year'] = retail_quarterly_total_nonadjusted['Year'].map(lambda x: re.findall('(\d{4})', x)[0])
retail_quarterly_total_nonadjusted['Quarter'] = retail_quarterly_total_nonadjusted.index.astype(str)
retail_quarterly_total_nonadjusted['Quarter'] = retail_quarterly_total_nonadjusted['Quarter'].map(lambda x: re.findall('(\d$)', x)[0])

#Defining Quarterly Dummies
retail_quarterly_total_nonadjusted=retail_quarterly_total_nonadjusted.join(quarter_dummies)


In [47]:
#Quarter % Changes

quarterly_per_change_names_total=[f'Quarterly % Change, {i}' for i in retail_quarterly_total_nonadjusted.columns[0:7]]

for i in range(7):
    retail_quarterly_total_nonadjusted[quarterly_per_change_names_total[i]]=perc_change_mon(retail_quarterly_total_nonadjusted[column_names_total[i]], quarter_indices)


### Adjusted

Granular, Monthly

In [None]:
#Defining Dataframe

retail_monthly_granular_adjusted = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name='2010', usecols="B", skiprows=[i for i in range(1,79)]+[j for j in range(110,200)])
retail_monthly_granular_adjusted.columns=['Kind of Business']


In [None]:
#Populating Dataframe

for i in range(1992, 2022):
    retail_df_monthly = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name=f'{i}', usecols="C:N", skiprows = [i for i in range(1,79)]+[j for j in range(110,200)])
    retail_df_monthly.columns=[f'Jan. {i}', f'Feb. {i}', f'Mar. {i}', f'Apr. {i}', f'May {i}', f'Jun. {i}', f'Jul. {i}', f'Aug. {i}', f'Sep. {i}', f'Oct. {i}', f'Nov. {i}', f'Dec. {i}']
    retail_monthly_granular_adjusted=pd.concat([retail_monthly_granular_adjusted, retail_df_monthly], axis=1)


Total, Monthly

In [None]:
#Defining Dataframe

retail_monthly_total_adjusted = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name='2010', usecols="B", skiprows=[i for i in range(1,72)]+[j for j in range(79,200)])
retail_monthly_total_adjusted.columns=['Kind of Business']

In [None]:
#Populating Dataframe

for i in range(1992, 2022):
    retail_df_total = pd.read_excel('Data/retail_sales/mrtssales92-present.xls', sheet_name=f'{i}', usecols="C:N", skiprows=[i for i in range(1,72)]+[j for j in range(79,200)])
    retail_df_total.columns=[f'Jan. {i}', f'Feb. {i}', f'Mar. {i}', f'Apr. {i}', f'May {i}', f'Jun. {i}', f'Jul. {i}', f'Aug. {i}', f'Sep. {i}', f'Oct. {i}', f'Nov. {i}', f'Dec. {i}']
    retail_monthly_total_adjusted=pd.concat([retail_monthly_total_adjusted, retail_df_total], axis=1)


## % Change Regressions, Monthly (Nonadjusted)

Granular, 1 Month

In [None]:
#Monthly % Change, 20 lags (Granular)

ret_granular_summary_monthly_20lags = []
ret_granular_data_monthly_20lags = []

for i in range(58):
    temp_df = retail_monthly_granular_nonadjusted.copy()
    indices=np.where(retail_monthly_granular_nonadjusted[monthly_per_change_names[i]].notnull())[0]
    index_names_temp = [k for k in index_names if index_names.index(k) in indices]
    temp_df['y'] = retail_monthly_granular_nonadjusted[monthly_per_change_names[i]].dropna()
    temp_df['x_1'] = retail_monthly_granular_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_granular_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_granular_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_granular_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_granular_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_granular_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_granular_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_granular_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_granular_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_granular_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_granular_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_granular_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':20})
    ret_granular_summary_monthly_20lags.append(ols.summary(yname = temp_df[monthly_per_change_names], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_granular_data_monthly_20lags.append(df)

In [None]:
#Monthly % Change, 40 lags (Granular)

ret_granular_summary_monthly_40lags = []
ret_granular_data_monthly_40lags = []

for i in range(58):
    temp_df = retail_monthly_granular_nonadjusted.copy()
    indices=np.where(retail_monthly_granular_nonadjusted[monthly_per_change_names[i]].notnull())[0]
    index_names_temp = [k for k in index_names if index_names.index(k) in indices]
    temp_df['y'] = retail_monthly_granular_nonadjusted[monthly_per_change_names[i]].dropna()
    temp_df['x_1'] = retail_monthly_granular_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_granular_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_granular_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_granular_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_granular_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_granular_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_granular_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_granular_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_granular_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_granular_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_granular_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_granular_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':40})
    ret_granular_summary_monthly_40lags.append(ols.summary(yname = temp_df[monthly_per_change_names], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_granular_data_monthly_40lags.append(df)

Total, 1 Month

In [81]:
#Monthly % Change, 20 lags (total)

ret_total_summary_monthly_20lags = []
ret_total_data_monthly_20lags = []

for i in range(7):
    temp_df = retail_monthly_total_nonadjusted.copy()
    indices=np.where(retail_monthly_total_nonadjusted[monthly_per_change_names_total[i]].notnull())[0]
    index_names_temp = [k for k in index_names_total if index_names_total.index(k) in indices]
    temp_df['y'] = retail_monthly_total_nonadjusted[monthly_per_change_names_total[i]].dropna()
    temp_df['x_1'] = retail_monthly_total_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_total_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_total_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_total_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_total_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_total_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_total_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_total_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_total_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_total_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_total_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_total_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':20})
    ret_total_summary_monthly_20lags.append(ols.summary(yname = temp_df[monthly_per_change_names_total[i]], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_total_data_monthly_20lags.append(df)

In [82]:
#Monthly % Change, 40 lags (total)

ret_total_summary_monthly_40lags = []
ret_total_data_monthly_40lags = []

for i in range(7):
    temp_df = retail_monthly_total_nonadjusted.copy()
    indices=np.where(retail_monthly_total_nonadjusted[monthly_per_change_names_total[i]].notnull())[0]
    index_names_temp = [k for k in index_names_total if index_names_total.index(k) in indices]
    temp_df['y'] = retail_monthly_total_nonadjusted[monthly_per_change_names_total[i]].dropna()
    temp_df['x_1'] = retail_monthly_total_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_total_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_total_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_total_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_total_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_total_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_total_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_total_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_total_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_total_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_total_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_total_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':40})
    ret_total_summary_monthly_40lags.append(ols.summary(yname = temp_df[monthly_per_change_names_total[i]], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_total_data_monthly_40lags.append(df)

Granular, 3 Months

In [None]:
#3 Month % Change, 20 lags (Granular)

ret_granular_summary_threemon_20lags = []
ret_granular_data_threemon_20lags = []

for i in range(58):
    temp_df = retail_monthly_granular_nonadjusted.copy()
    indices=np.where(retail_monthly_granular_nonadjusted[threemonth_per_change_names[i]].notnull())[0]
    index_names_temp = [k for k in index_names if index_names.index(k) in indices]
    temp_df['y'] = retail_monthly_granular_nonadjusted[threemonth_per_change_names[i]].dropna()
    temp_df['x_1'] = retail_monthly_granular_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_granular_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_granular_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_granular_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_granular_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_granular_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_granular_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_granular_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_granular_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_granular_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_granular_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_granular_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':20})
    ret_granular_summary_threemon_20lags.append(ols.summary(yname = temp_df[threemonth_per_change_names], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_granular_data_threemon_20lags.append(df)

In [None]:
#3 Month % Change, 40 lags (Granular)

ret_granular_summary_threemon_40lags = []
ret_granular_data_threemon_40lags = []

for i in range(58):
    temp_df = retail_monthly_granular_nonadjusted.copy()
    indices=np.where(retail_monthly_granular_nonadjusted[threemonth_per_change_names[i]].notnull())[0]
    index_names_temp = [k for k in index_names if index_names.index(k) in indices]
    temp_df['y'] = retail_monthly_granular_nonadjusted[threemonth_per_change_names[i]].dropna()
    temp_df['x_1'] = retail_monthly_granular_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_granular_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_granular_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_granular_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_granular_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_granular_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_granular_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_granular_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_granular_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_granular_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_granular_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_granular_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':40})
    ret_granular_summary_threemon_40lags.append(ols.summary(yname = temp_df[threemonth_per_change_names], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_granular_data_threemon_40lags.append(df)

Total, 3 Month

In [83]:
#3 Month % Change, 20 lags (total)

ret_total_summary_threemon_20lags = []
ret_total_data_threemon_20lags = []

for i in range(7):
    temp_df = retail_monthly_total_nonadjusted.copy()
    indices=np.where(retail_monthly_total_nonadjusted[threemonth_per_change_names_total[i]].notnull())[0]
    index_names_temp = [k for k in index_names_total if index_names_total.index(k) in indices]
    temp_df['y'] = retail_monthly_total_nonadjusted[threemonth_per_change_names_total[i]].dropna()
    temp_df['x_1'] = retail_monthly_total_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_total_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_total_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_total_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_total_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_total_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_total_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_total_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_total_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_total_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_total_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_total_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':20})
    ret_total_summary_threemon_20lags.append(ols.summary(yname = temp_df[threemonth_per_change_names_total[i]], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_total_data_threemon_20lags.append(df)

In [84]:
#3 Month % Change, 40 lags (total)

ret_total_summary_threemon_40lags = []
ret_total_data_threemon_40lags = []

for i in range(7):
    temp_df = retail_monthly_total_nonadjusted.copy()
    indices=np.where(retail_monthly_total_nonadjusted[threemonth_per_change_names_total[i]].notnull())[0]
    index_names_temp = [k for k in index_names_total if index_names_total.index(k) in indices]
    temp_df['y'] = retail_monthly_total_nonadjusted[threemonth_per_change_names_total[i]].dropna()
    temp_df['x_1'] = retail_monthly_total_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_total_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_total_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_total_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_total_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_total_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_total_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_total_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_total_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_total_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_total_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_total_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':40})
    ret_total_summary_threemon_40lags.append(ols.summary(yname = temp_df[threemonth_per_change_names_total[i]], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_total_data_threemon_40lags.append(df)

## % Change Regressions (Quarterly, Nonadjusted)

Granular

In [85]:
#Quarterly % Change, 5 lags (Granular)

ret_granular_summary_quarterly_5lags = []
ret_granular_data_quarterly_5lags = []

for i in range(58):
    temp_df = retail_quarterly_granular_nonadjusted.copy()
    indices=np.where(retail_quarterly_granular_nonadjusted[quarterly_per_change_names[i]].notnull())[0]
    index_names_temp = [k for k in quarter_names if quarter_names.index(k) in indices]
    temp_df['y'] = retail_quarterly_granular_nonadjusted[quarterly_per_change_names[i]].dropna()
    temp_df['x_1'] = retail_quarterly_granular_nonadjusted['1'].loc[index_names_temp]
    temp_df['x_2'] = retail_quarterly_granular_nonadjusted['2'].loc[index_names_temp]
    temp_df['x_3'] = retail_quarterly_granular_nonadjusted['3'].loc[index_names_temp]
    temp_df['x_4'] = retail_quarterly_granular_nonadjusted['4'].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':5})
    ret_granular_summary_quarterly_5lags.append(ols.summary(yname = temp_df[quarterly_per_change_names], xname = ['Q1', 'Q2', 'Q3', 'Q4']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_granular_data_quarterly_5lags.append(df)

  return np.sum(weights * (model.endog - mean)**2)
  return np.sum(weights * (model.endog - mean)**2)
  return np.sum(weights * (model.endog - mean)**2)
  return np.sum(weights * (model.endog - mean)**2)
  return np.sum(weights * (model.endog - mean)**2)


In [86]:
#Quarterly % Change, 10 lags (Granular)

ret_granular_summary_quarterly_10lags = []
ret_granular_data_quarterly_10lags = []

for i in range(58):
    temp_df = retail_quarterly_granular_nonadjusted.copy()
    indices=np.where(retail_quarterly_granular_nonadjusted[quarterly_per_change_names[i]].notnull())[0]
    index_names_temp = [k for k in quarter_names if quarter_names.index(k) in indices]
    temp_df['y'] = retail_quarterly_granular_nonadjusted[quarterly_per_change_names[i]].dropna()
    temp_df['x_1'] = retail_quarterly_granular_nonadjusted['1'].loc[index_names_temp]
    temp_df['x_2'] = retail_quarterly_granular_nonadjusted['2'].loc[index_names_temp]
    temp_df['x_3'] = retail_quarterly_granular_nonadjusted['3'].loc[index_names_temp]
    temp_df['x_4'] = retail_quarterly_granular_nonadjusted['4'].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':10})
    ret_granular_summary_quarterly_10lags.append(ols.summary(yname = temp_df[quarterly_per_change_names], xname = ['Q1', 'Q2', 'Q3', 'Q4']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_granular_data_quarterly_10lags.append(df)

  return np.sum(weights * (model.endog - mean)**2)
  return np.sum(weights * (model.endog - mean)**2)
  return np.sum(weights * (model.endog - mean)**2)
  return np.sum(weights * (model.endog - mean)**2)
  return np.sum(weights * (model.endog - mean)**2)


Total

In [87]:
#Quarterly % Change, 5 lags (Total)

ret_total_summary_quarterly_5lags = []
ret_total_data_quarterly_5lags = []

for i in range(7):
    temp_df = retail_quarterly_total_nonadjusted.copy()
    indices=np.where(retail_quarterly_total_nonadjusted[quarterly_per_change_names_total[i]].notnull())[0]
    index_names_temp = [k for k in quarter_names if quarter_names.index(k) in indices]
    temp_df['y'] = retail_quarterly_total_nonadjusted[quarterly_per_change_names_total[i]].dropna()
    temp_df['x_1'] = retail_quarterly_total_nonadjusted['1'].loc[index_names_temp]
    temp_df['x_2'] = retail_quarterly_total_nonadjusted['2'].loc[index_names_temp]
    temp_df['x_3'] = retail_quarterly_total_nonadjusted['3'].loc[index_names_temp]
    temp_df['x_4'] = retail_quarterly_total_nonadjusted['4'].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':5})
    ret_total_summary_quarterly_5lags.append(ols.summary(yname = temp_df[quarterly_per_change_names_total], xname = ['Q1', 'Q2', 'Q3', 'Q4']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_total_data_quarterly_5lags.append(df)

In [88]:
#Quarterly % Change, 10 lags (Total)

ret_total_summary_quarterly_10lags = []
ret_total_data_quarterly_10lags = []

for i in range(7):
    temp_df = retail_quarterly_total_nonadjusted.copy()
    indices=np.where(retail_quarterly_total_nonadjusted[quarterly_per_change_names_total[i]].notnull())[0]
    index_names_temp = [k for k in quarter_names if quarter_names.index(k) in indices]
    temp_df['y'] = retail_quarterly_total_nonadjusted[quarterly_per_change_names_total[i]].dropna()
    temp_df['x_1'] = retail_quarterly_total_nonadjusted['1'].loc[index_names_temp]
    temp_df['x_2'] = retail_quarterly_total_nonadjusted['2'].loc[index_names_temp]
    temp_df['x_3'] = retail_quarterly_total_nonadjusted['3'].loc[index_names_temp]
    temp_df['x_4'] = retail_quarterly_total_nonadjusted['4'].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':10})
    ret_total_summary_quarterly_10lags.append(ols.summary(yname = temp_df[quarterly_per_change_names_total], xname = ['Q1', 'Q2', 'Q3', 'Q4']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    ret_total_data_quarterly_10lags.append(df)

## Changes in Expenditure Shares Regressions (Monthly, Nonadjusted)

Granular

In [None]:
#Expenditure Shares, 20 lags (Granular)

exp_granular_monthly_20lags = []
exp_granular_data_monthly_20lags = []

for i in range(58):
    temp_df = retail_monthly_granular_nonadjusted.copy()
    indices=np.where(retail_monthly_granular_nonadjusted[monthly_share_names[i]].notnull())[0]
    index_names_temp = [k for k in index_names if index_names.index(k) in indices]
    temp_df['y'] = retail_monthly_granular_nonadjusted[monthly_share_names[i]].dropna()
    temp_df['x_1'] = retail_monthly_granular_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_granular_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_granular_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_granular_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_granular_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_granular_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_granular_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_granular_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_granular_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_granular_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_granular_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_granular_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':20})
    exp_granular_monthly_20lags.append(ols.summary(yname = temp_df[monthly_share_names[i]], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    exp_granular_data_monthly_20lags.append(df)

In [None]:
#Expenditure Shares, 40 lags (Granular)

exp_granular_monthly_40lags = []
exp_granular_data_monthly_40lags = []

for i in range(58):
    temp_df = retail_monthly_granular_nonadjusted.copy()
    indices=np.where(retail_monthly_granular_nonadjusted[monthly_share_names[i]].notnull())[0]
    index_names_temp = [k for k in index_names if index_names.index(k) in indices]
    temp_df['y'] = retail_monthly_granular_nonadjusted[monthly_share_names[i]].dropna()
    temp_df['x_1'] = retail_monthly_granular_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_granular_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_granular_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_granular_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_granular_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_granular_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_granular_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_granular_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_granular_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_granular_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_granular_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_granular_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':40})
    exp_granular_monthly_40lags.append(ols.summary(yname = temp_df[monthly_share_names[i]], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    exp_granular_data_monthly_40lags.append(df)

Total

In [89]:
#Expenditure Shares, 20 lags (total)

exp_total_summary_monthly_20lags = []
exp_total_data_monthly_20lags = []

for i in range(7):
    temp_df = retail_monthly_total_nonadjusted.copy()
    indices=np.where(retail_monthly_total_nonadjusted[monthly_share_names_total[i]].notnull())[0] 
    index_names_temp = [k for k in index_names_total if index_names_total.index(k) in indices]
    temp_df['y'] = retail_monthly_total_nonadjusted[monthly_share_names_total[i]].dropna()
    temp_df['x_1'] = retail_monthly_total_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_total_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_total_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_total_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_total_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_total_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_total_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_total_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_total_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_total_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_total_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_total_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':20})
    exp_total_summary_monthly_20lags.append(ols.summary(yname = temp_df[monthly_share_names_total[i]], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    exp_total_data_monthly_20lags.append(df)

In [90]:
#Expenditure Shares, 40 lags (total)

exp_total_summary_monthly_40lags = []
exp_total_data_monthly_40lags = []

for i in range(7):
    temp_df = retail_monthly_total_nonadjusted.copy()
    indices=np.where(retail_monthly_total_nonadjusted[monthly_share_names_total[i]].notnull())[0] 
    index_names_temp = [k for k in index_names_total if index_names_total.index(k) in indices]
    temp_df['y'] = retail_monthly_total_nonadjusted[monthly_share_names_total[i]].dropna()
    temp_df['x_1'] = retail_monthly_total_nonadjusted[1].loc[index_names_temp]
    temp_df['x_2'] = retail_monthly_total_nonadjusted[2].loc[index_names_temp]
    temp_df['x_3'] = retail_monthly_total_nonadjusted[3].loc[index_names_temp]
    temp_df['x_4'] = retail_monthly_total_nonadjusted[4].loc[index_names_temp]
    temp_df['x_5'] = retail_monthly_total_nonadjusted[5].loc[index_names_temp]
    temp_df['x_6'] = retail_monthly_total_nonadjusted[6].loc[index_names_temp]
    temp_df['x_7'] = retail_monthly_total_nonadjusted[7].loc[index_names_temp]
    temp_df['x_8'] = retail_monthly_total_nonadjusted[8].loc[index_names_temp]
    temp_df['x_9'] = retail_monthly_total_nonadjusted[9].loc[index_names_temp]
    temp_df['x_10'] = retail_monthly_total_nonadjusted[10].loc[index_names_temp]
    temp_df['x_11'] = retail_monthly_total_nonadjusted[11].loc[index_names_temp]
    temp_df['x_12'] = retail_monthly_total_nonadjusted[12].loc[index_names_temp]
    ols = sm.ols(formula = 'y~x_1+x_2+x_3+x_4+x_5+x_6+x_7+x_8+x_9+x_10+x_11+x_12-1', data=temp_df).fit(cov_type='HAC',cov_kwds={'maxlags':40})
    exp_total_summary_monthly_40lags.append(ols.summary(yname = temp_df[monthly_share_names_total[i]], xname = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']))
    df = pd.DataFrame
    df = pd.concat((ols.params.rename('coefficient'), ols.bse.rename('se'), ols.tvalues.rename('t')), axis=1)
    exp_total_data_monthly_40lags.append(df)

## Plots

% Changes, Sales (Monthly)

In [None]:
#Visualizations: Granular, 20 lags 

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(58):
    plot = plt.pyplot.errorbar(x=month_list, y=ret_granular_data_monthly_20lags[i]['coefficient'], yerr = ret_granular_data_monthly_20lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Granular, Monthly, 20 lags/{column_names[i]}_monthly_20lags.png', bbox_inches='tight')
    plt.pyplot.clf()

In [None]:
#Visualizations: Granular, 40 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(58):
    plot = plt.pyplot.errorbar(x=month_list, y=ret_granular_data_monthly_40lags[i]['coefficient'], yerr = ret_granular_data_monthly_40lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Granular, Monthly, 40 lags/{column_names[i]}_monthly_40lags.png', bbox_inches='tight')
    plt.pyplot.clf()

In [None]:
#Visualizations: Total, 20 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(7):
    plot = plt.pyplot.errorbar(x=month_list, y=ret_total_data_monthly_20lags[i]['coefficient'], yerr = ret_total_data_monthly_20lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Total, Monthly, 20 lags/{column_names_total[i]}_monthly_20lags.png', bbox_inches='tight')
    plt.pyplot.clf()

In [None]:
#Visualizations: Total, 40 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(7):
    plot = plt.pyplot.errorbar(x=month_list, y=ret_total_data_monthly_40lags[i]['coefficient'], yerr = ret_total_data_monthly_40lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Total, Monthly, 40 lags/{column_names_total[i]}_monthly_40lags.png', bbox_inches='tight')
    plt.pyplot.clf()

% Changes, Sales (3 months)

In [None]:
#Visualizations: Granular, 20 lags 

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(58):
    plot = plt.pyplot.errorbar(x=month_list, y=ret_granular_data_threemon_20lags[i]['coefficient'], yerr = ret_granular_data_threemon_20lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Granular, Quarterly, 20 lags/{column_names[i]}_quarterly_20lags.png', bbox_inches='tight')
    plt.pyplot.clf()

In [None]:
#Visualizations: Granular, 40 lags 

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(58):
    plot = plt.pyplot.errorbar(x=month_list, y=ret_granular_data_threemon_40lags[i]['coefficient'], yerr = ret_granular_data_threemon_40lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Granular, Quarterly, 20 lags/{column_names[i]}_quarterly_40lags.png', bbox_inches='tight')
    plt.pyplot.clf()

In [91]:
#Visualizations: Total, 20 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(7):
    plot = plt.pyplot.errorbar(x=month_list, y=ret_total_data_threemon_20lags[i]['coefficient'], yerr = ret_total_data_threemon_20lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Total, 3 Months, 20 lags/{column_names_total[i]}_threemon_20lags.png', bbox_inches='tight')
    plt.pyplot.clf()

<Figure size 432x288 with 0 Axes>

In [92]:
#Visualizations: Total, 40 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(7):
    plot = plt.pyplot.errorbar(x=month_list, y=ret_total_data_threemon_40lags[i]['coefficient'], yerr = ret_total_data_threemon_40lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Total, 3 Months, 40 lags/{column_names_total[i]}_threemon_40lags.png', bbox_inches='tight')
    plt.pyplot.clf()

<Figure size 432x288 with 0 Axes>

% Changes, Sales (Quarterly)

In [51]:
#Visualizations: Total, 5 lags 

quarter_list = ['Q1', 'Q2', 'Q3', 'Q4']

for i in range(7):
    plot = plt.pyplot.errorbar(x=quarter_list, y=ret_total_data_quarterly_5lags[i]['coefficient'], yerr = ret_total_data_quarterly_5lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Total, Quarterly, 5 lags/{column_names_total[i]}_quarterly_5lags.png', bbox_inches='tight')
    plt.pyplot.clf()

<Figure size 432x288 with 0 Axes>

In [52]:
#Visualizations: Total, 10 lags 

quarter_list = ['Q1', 'Q2', 'Q3', 'Q4']

for i in range(7):
    plot = plt.pyplot.errorbar(x=quarter_list, y=ret_total_data_quarterly_10lags[i]['coefficient'], yerr = ret_total_data_quarterly_10lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Total, Quarterly, 10 lags/{column_names_total[i]}_quarterly_10lags.png', bbox_inches='tight')
    plt.pyplot.clf()

<Figure size 432x288 with 0 Axes>

In [93]:
#Visualizations: Granular, 5 lags 

quarter_list = ['Q1', 'Q2', 'Q3', 'Q4']

for i in range(58):
    plot = plt.pyplot.errorbar(x=quarter_list, y=ret_granular_data_quarterly_5lags[i]['coefficient'], yerr = ret_granular_data_quarterly_5lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Granular, Quarterly, 5 lags/{column_names[i]}_quarterly_5lags.png', bbox_inches='tight')
    plt.pyplot.clf()

<Figure size 432x288 with 0 Axes>

In [94]:
#Visualizations: Granular, 10 lags 

quarter_list = ['Q1', 'Q2', 'Q3', 'Q4']

for i in range(58):
    plot = plt.pyplot.errorbar(x=quarter_list, y=ret_granular_data_quarterly_10lags[i]['coefficient'], yerr = ret_granular_data_quarterly_10lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Percent Changes/Granular, Quarterly, 10 lags/{column_names[i]}_quarterly_10lags.png', bbox_inches='tight')
    plt.pyplot.clf()

<Figure size 432x288 with 0 Axes>

Expenditure Shares

In [None]:
#Visualizations: Granular, 20 lags 

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(58):
    plot = plt.pyplot.errorbar(x=month_list, y=exp_granular_data_monthly_20lags[i]['coefficient'], yerr = exp_granular_data_monthly_20lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Sales Shares/Granular, Monthly, 20 lags/{column_names[i]}_monthly_20lags.png', bbox_inches='tight')
    plt.pyplot.clf()

In [None]:
#Visualizations: Granular, 40 lags 

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(58):
    plot = plt.pyplot.errorbar(x=month_list, y=exp_granular_data_monthly_40lags[i]['coefficient'], yerr = exp_granular_data_monthly_40lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Sales Shares/Granular, Monthly, 40 lags/{column_names[i]}_monthly_40lags.png', bbox_inches='tight')
    plt.pyplot.clf()

In [95]:
#Visualizations: Total, 20 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(7):
    plot = plt.pyplot.errorbar(x=month_list, y=exp_total_data_monthly_20lags[i]['coefficient'], yerr = exp_total_data_monthly_20lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Sales Shares/Total, Monthly, 20 lags/{column_names_total[i]}_monthly_20lags.png', bbox_inches='tight')
    plt.pyplot.clf()


<Figure size 432x288 with 0 Axes>

In [96]:
#Visualizations: Total, 40 lags

month_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in range(7):
    plot = plt.pyplot.errorbar(x=month_list, y=exp_total_data_monthly_40lags[i]['coefficient'], yerr = exp_total_data_monthly_40lags[i]['se'])
    plt.pyplot.savefig(f'Data Visualizations/Retail Sales/Sales Shares/Total, Monthly, 40 lags/{column_names_total[i]}_monthly_40lags.png', bbox_inches='tight')
    plt.pyplot.clf()


<Figure size 432x288 with 0 Axes>