In [21]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import linearmodels.iv.model as lm
from scipy import stats

In [22]:
def firstStageRegress(variables, endo):
    """First Stage Regression"""
    
    X_stage1 = sm.add_constant(df[variables])
    y_stage1 = df[endo]

    # Fit the regression to find the predicted values
    results_stage1 = sm.OLS(y_stage1, X_stage1).fit()
    predictor = results_stage1.predict(X_stage1)

    return results_stage1, predictor 

def secondStageRegress(variables, predict):  
    """Second Stage Regression"""
    
    X_stage2 = sm.add_constant(df[variables])
    y_stage2 = df[predict]  

    # Fit the regression model
    results_stage2 = sm.OLS(y_stage2, X_stage2).fit()
    
    return results_stage2

In [23]:
# Read dataset
df = pd.read_csv("Data-GP1-1(updated).csv")
df

Unnamed: 0,Mon,Tue,Wed,Thu,Date,Month,Year,Stormy,Mixed,p,q,Rainy,Cold,Wind
0,1,0,0,0,2,12,91,1,0,-0.430783,8.994421,1,0,2.995732
1,0,1,0,0,3,12,91,1,0,0.000000,7.707063,0,0,2.995732
2,0,0,1,0,4,12,91,0,1,0.072321,8.350194,1,1,2.813411
3,0,0,0,1,5,12,91,1,0,0.247139,8.656955,0,1,3.036554
4,0,0,0,0,6,12,91,1,0,0.664327,7.844241,0,1,3.036554
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0,0,0,4,5,92,0,0,-0.798508,8.610683,0,0,2.862201
107,0,1,0,0,5,5,92,0,1,-0.087011,7.162397,0,0,2.908721
108,0,0,1,0,6,5,92,0,1,0.184922,7.362010,0,0,2.862201
109,0,0,0,1,7,5,92,0,1,0.223143,8.764053,0,0,2.813411


In [24]:
# Declare Variables that will be the same across all models

Exo = ["Mon", "Tue","Wed","Thu"]
Endo = ["p"]
Predict = ["q"]

---

# Model 1: "Stormy","Mixed"

In [25]:
# Declare IVs
IV = ["Stormy", "Mixed"]

# First Stage Regression
result1, p_hat = firstStageRegress(IV + Exo, Endo)
df["phat"] = p_hat
print(result1.summary())

# Second Stage Regression
result2 = secondStageRegress(["phat"] + Exo, Predict)
print(result2.summary())


                            OLS Regression Results                            
Dep. Variable:                      p   R-squared:                       0.245
Model:                            OLS   Adj. R-squared:                  0.201
Method:                 Least Squares   F-statistic:                     5.624
Date:                Wed, 20 Sep 2023   Prob (F-statistic):           4.35e-05
Time:                        10:48:31   Log-Likelihood:                -34.566
No. Observations:                 111   AIC:                             83.13
Df Residuals:                     104   BIC:                             102.1
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.3596      0.079     -4.543      0.0

---

# Model 2: "Wind","Mixed"

In [26]:
# Declare IVs
IV = ["Wind", "Mixed"]

# First Stage Regression
result1, p_hat = firstStageRegress(IV + Exo, Endo)
df["phat"] = p_hat
print(result1.summary())

# Second Stage Regression
result2 = secondStageRegress(["phat"] + Exo, Predict)
print(result2.summary())


                            OLS Regression Results                            
Dep. Variable:                      p   R-squared:                       0.209
Model:                            OLS   Adj. R-squared:                  0.163
Method:                 Least Squares   F-statistic:                     4.577
Date:                Wed, 20 Sep 2023   Prob (F-statistic):           0.000368
Time:                        10:48:31   Log-Likelihood:                -37.156
No. Observations:                 111   AIC:                             88.31
Df Residuals:                     104   BIC:                             107.3
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.5404      0.470     -5.405      0.0

---

# Model 3: "Wind","Rainy"

In [27]:
# Declare IVs
IV = ["Wind", "Rainy"]

# First Stage Regression
result1, p_hat = firstStageRegress(IV + Exo, Endo)
df["phat"] = p_hat
print(result1.summary())

# Second Stage Regression
result2 = secondStageRegress(["phat"] + Exo, Predict)
print(result2.summary())


                            OLS Regression Results                            
Dep. Variable:                      p   R-squared:                       0.203
Model:                            OLS   Adj. R-squared:                  0.157
Method:                 Least Squares   F-statistic:                     4.411
Date:                Wed, 20 Sep 2023   Prob (F-statistic):           0.000517
Time:                        10:48:31   Log-Likelihood:                -37.578
No. Observations:                 111   AIC:                             89.16
Df Residuals:                     104   BIC:                             108.1
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.5095      0.471     -5.330      0.0

---

# Model 4: "Mixed","Rainy", "Wind"

In [28]:
# Declare IVs
IV = ["Mixed", "Rainy","Wind"]

# First Stage Regression
result1, p_hat = firstStageRegress(IV + Exo, Endo)
df["phat"] = p_hat
print(result1.summary())

# Second Stage Regression
result2 = secondStageRegress(["phat"] + Exo, Predict)
print(result2.summary())


                            OLS Regression Results                            
Dep. Variable:                      p   R-squared:                       0.209
Model:                            OLS   Adj. R-squared:                  0.155
Method:                 Least Squares   F-statistic:                     3.886
Date:                Wed, 20 Sep 2023   Prob (F-statistic):           0.000855
Time:                        10:48:32   Log-Likelihood:                -37.156
No. Observations:                 111   AIC:                             90.31
Df Residuals:                     103   BIC:                             112.0
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.5402      0.473     -5.376      0.0