<a href="https://colab.research.google.com/github/roysaurav/Canada-GDP-Model/blob/main/Canada_GDP_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Setup**


In [None]:
!pip install plotly==4.8



In [None]:
model_training_start_date = '2015-01-01' #@param {type:"date"}

In [None]:
'''
from google.colab import drive
drive.mount("/content/drive")
'''

'\nfrom google.colab import drive\ndrive.mount("/content/drive")\n'

In [None]:
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import plotly.express as px


In [None]:
dep_var = "STCA Canada Expenditure Based"	

In [None]:
#@title Data Prep

path = "/content/drive/MyDrive/CI Global/Project.xlsx"
path  = "https://drive.google.com/uc?export=download&id=16hS_2LyULBmsjLd8cCLkRJ-2WJA1U082"

df = pd.read_excel(path, sheet_name="GDP Canada Data", skiprows=1).set_index("Date").sort_index().dropna()
df[dep_var] = df[dep_var].shift(-1)

df.head()

Unnamed: 0_level_0,STCA Canada Expenditure Based,Canada Real GDP (QoQ % SAAR),Canada OECD Leading Indicators,PMI Output,PMI New Orders,PMI Stocks of Finished Goods,Canada GDP Household Saving Rate,STCA Canada Retail Sales YoY S,Average Hourly Wage Rate,Canada Wages & Salaries Labour,OECD Canada Employment,STCA Canada Labor Force Unemployment,STCA Canada Net Change in Labor,STCA Canada Initial & Renewal Jobless Claim,Canada Mortgage and Housing Co,Real GDP by Expenditure Reside
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2010-10-29,6.215,2.9,101.1048,53.3,55.7,51.4,3.8,3.9,1.44,70885632.0,80.29,8.0,26.2,252140.0,174.102,-7.2
2010-11-30,5.575,2.9,101.1815,55.0,57.8,52.8,3.8,7.0,2.19,71184622.0,80.29,7.7,30.2,239030.0,194.104,-7.2
2010-12-31,5.575,4.59,101.2633,57.9,56.7,48.5,4.2,4.9,1.83,71685950.0,80.48,7.7,45.1,245960.0,166.823,-2.8
2011-01-31,5.575,4.59,101.3013,56.8,55.4,49.4,4.2,3.4,1.78,72156974.0,80.48,7.8,59.3,233140.0,170.157,-2.8
2011-02-28,5.713,4.59,101.2443,55.9,56.8,50.5,4.2,3.8,2.44,72618930.0,80.48,7.7,0.4,240590.0,179.438,-2.8


In [None]:
vars = list(df.columns)
vars.remove(dep_var)

# **Correlation**


In [None]:
#@title Cross Sectional Correlations

corr_ = df.loc[model_training_start_date:].corr()
fig = px.imshow(corr_, x= df.columns, y=df.columns, color_continuous_scale="RdYlGn", width=700, height=700)
fig.show()

In [None]:
rolling_corr_df = df.rolling(12*3).corr().dropna().reset_index()
rolling_corr_df = rolling_corr_df[rolling_corr_df["level_1"] == dep_var]
rolling_corr_df = rolling_corr_df.drop(columns=[dep_var, "level_1"]).set_index("Date")


In [None]:
#@title Real GDP Correlation

fig = rolling_corr_df[["Canada Real GDP (QoQ % SAAR)", "Real GDP by Expenditure Reside"]].plot(backend="plotly", title="GDP Correlation")
fig.update_yaxes(title_text='Correlation')
fig.show()

In [None]:
#@title Leading Indicator Correlation

fig = rolling_corr_df[["Canada OECD Leading Indicators"]].plot(backend="plotly")
fig.update_yaxes(title_text='Correlation')

fig.show()

In [None]:
#@title PMI Indicator Correlation

fig = rolling_corr_df[['PMI Output', 'PMI New Orders', 'PMI Stocks of Finished Goods']].plot(backend="plotly")
fig.update_yaxes(title_text='Correlation')

fig.show()

In [None]:
#@title Employment Metrics Correlation

fig = rolling_corr_df[[ 
       'OECD Canada Employment', 'STCA Canada Labor Force Unemployment',
       'STCA Canada Net Change in Labor']].plot(backend="plotly")
fig.update_yaxes(title_text='Correlation')

fig.show()

In [None]:
#@title Housing Metrics Correlation

fig = rolling_corr_df[["Canada Mortgage and Housing Co"]].plot(backend="plotly")
fig.update_yaxes(title_text='Correlation')
fig.show()

In [None]:
#@title Wages, Savings and Sales Correlation

fig = rolling_corr_df[['Average Hourly Wage Rate', 'Canada Wages & Salaries Labour','Canada GDP Household Saving Rate', 'STCA Canada Retail Sales YoY S']].plot(backend="plotly")
fig.update_yaxes(title_text='Correlation')
fig.show()


# **Model**


In [None]:
def run_model(var_):
  X = df_[var_]
  Y = df_[[dep_var]]
  X = sm.add_constant(X) # adding a constant

  mod = sm.OLS(Y,X)
  res = mod.fit()
  return res.rsquared, res.rsquared_adj, res.pvalues[var_[-1]]



In [None]:
#dep_var = "US Nominal GDP"	
vars = list(df.columns)
vars.remove(dep_var)

In [None]:
#@title Scaling Variables for Model

df = (df - df.min())/(df.min() - df.max())
df = df.dropna()
df.tail()

Unnamed: 0_level_0,STCA Canada Expenditure Based,Canada Real GDP (QoQ % SAAR),Canada OECD Leading Indicators,PMI Output,PMI New Orders,PMI Stocks of Finished Goods,Canada GDP Household Saving Rate,STCA Canada Retail Sales YoY S,Average Hourly Wage Rate,Canada Wages & Salaries Labour,OECD Canada Employment,STCA Canada Labor Force Unemployment,STCA Canada Net Change in Labor,STCA Canada Initial & Renewal Jobless Claim,Canada Mortgage and Housing Co,Real GDP by Expenditure Reside
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-04-30,-0.682893,-0.688162,-0.0,-0.0,-0.0,-0.0,-0.180505,-0.0,-1.0,-0.638752,-1.0,-0.927711,-0.0,-0.338174,-0.152565,-0.195345
2020-05-29,-0.0,-0.688162,-0.177951,-0.294618,-0.402299,-0.102804,-0.180505,-0.369347,-0.945838,-0.618643,-1.0,-1.0,-0.782099,-0.338174,-0.418931,-0.195345
2020-06-30,-0.0,-0.0,-0.374942,-0.648725,-0.62069,-0.383178,-1.0,-0.844221,-0.628887,-0.712252,-1.0,-0.855422,-1.0,-0.338174,-0.56678,-0.0
2020-07-31,-0.0,-0.0,-0.452721,-0.883853,-0.798851,-0.392523,-1.0,-0.854271,-0.514544,-0.835636,-1.0,-0.662651,-0.821125,-0.338174,-0.856929,-0.0
2020-08-31,-0.473176,-0.0,-0.508329,-0.909348,-0.893678,-0.317757,-1.0,-0.88191,-0.548646,-0.887626,-1.0,-0.578313,-0.751866,-0.338174,-1.0,-0.0


In [None]:
#@title Searching Best GDP Model

required_var = []
adjusted_rsquares = []
temp_rsquares = pd.DataFrame(data={"rsquared_adj":[],"rsquared":[], "name":[], "p_values":[] })
result_df = pd.DataFrame(data={"rsquared_adj":[],"rsquared":[], "num_vars" :[]})
df_ = df.loc[model_training_start_date:]
while vars != []:
  for var in vars:
    rsquared, rsquared_adj, pvalues = run_model(required_var + [var])
    temp_rsquares_ = pd.DataFrame(data={"rsquared_adj":[rsquared_adj],"rsquared":[rsquared], "name":[var], "p_values":[pvalues] })
    temp_rsquares = pd.concat([temp_rsquares, temp_rsquares_])
  #print(temp_rsquares)
  #print(required_var)
  try:
    temp_rsquares = temp_rsquares[abs(temp_rsquares["p_values"]) < 0.05]
    new_var = temp_rsquares[temp_rsquares["rsquared"] == max(temp_rsquares["rsquared"])]["name"].iloc[0]
    required_var.append(new_var)
    vars.remove(new_var)
    temp_rsquares_ = temp_rsquares_.drop(columns=["name"])
    temp_rsquares_["num_vars"] = len(required_var)
    result_df = pd.concat([result_df, temp_rsquares_])
    temp_rsquares = pd.DataFrame(data={"rsquared_adj":[],"rsquared":[], "name":[] , "p_values":[]})
  except:
    print("Best Model Found")
    vars = []


Best Model Found


In [None]:
fig = result_df.plot(x ="num_vars", y ="rsquared_adj", backend="plotly")
fig.update_yaxes(title_text='Model fit Adjusted for Number of Variables')
fig.update_xaxes(title_text='Number of Variables')

fig.show()

In [None]:
#@title Best GDP Model Parameters

X = df_[required_var]
Y = df_[[dep_var]]
X = sm.add_constant(X) # adding a constant

mod = sm.OLS(Y,X)
res = mod.fit()
print(res.summary())

                                  OLS Regression Results                                 
Dep. Variable:     STCA Canada Expenditure Based   R-squared:                       0.920
Model:                                       OLS   Adj. R-squared:                  0.912
Method:                            Least Squares   F-statistic:                     117.0
Date:                           Fri, 19 Mar 2021   Prob (F-statistic):           1.50e-31
Time:                                   07:52:56   Log-Likelihood:                 100.70
No. Observations:                             68   AIC:                            -187.4
Df Residuals:                                 61   BIC:                            -171.9
Df Model:                                      6                                         
Covariance Type:                       nonrobust                                         
                                           coef    std err          t      P>|t|      [0.025      0.