In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import linearmodels.iv.model as lm
from scipy import stats

In [4]:
def firstStageRegress(variables, endo):
    """First Stage Regression"""
    
    X_stage1 = sm.add_constant(df[variables])
    y_stage1 = df[endo]

    # Fit the regression to find the predicted values
    results_stage1 = sm.OLS(y_stage1, X_stage1).fit()
    predictor = results_stage1.predict(X_stage1)

    return results_stage1, predictor 

def secondStageRegress(variables, predict):  
    """Second Stage Regression"""
    
    X_stage2 = sm.add_constant(df[variables])
    y_stage2 = df[predict]  

    # Fit the regression model
    results_stage2 = sm.OLS(y_stage2, X_stage2).fit()
    
    return results_stage2

In [5]:
df = pd.read_csv("Data-GP1-1(updated).csv")
df

Unnamed: 0,Mon,Tue,Wed,Thu,Date,Jan,Feb,Mar,Apr,May,Month,Year,Stormy,Mixed,p,q,Rainy,Cold,Wind
0,1,0,0,0,2,0,0,0,0,0,12,91,1,0,-0.430783,8.994421,1,0,2.995732
1,0,1,0,0,3,0,0,0,0,0,12,91,1,0,0.000000,7.707063,0,0,2.995732
2,0,0,1,0,4,0,0,0,0,0,12,91,0,1,0.072321,8.350194,1,1,2.813411
3,0,0,0,1,5,0,0,0,0,0,12,91,1,0,0.247139,8.656955,0,1,3.036554
4,0,0,0,0,6,0,0,0,0,0,12,91,1,0,0.664327,7.844241,0,1,3.036554
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0,0,0,4,0,0,0,0,1,5,92,0,0,-0.798508,8.610683,0,0,2.862201
107,0,1,0,0,5,0,0,0,0,1,5,92,0,1,-0.087011,7.162397,0,0,2.908721
108,0,0,1,0,6,0,0,0,0,1,5,92,0,1,0.184922,7.362010,0,0,2.862201
109,0,0,0,1,7,0,0,0,0,1,5,92,0,1,0.223143,8.764053,0,0,2.813411


---

# Original Regression

In [11]:
# Declare Variables

Exo = ["Mon", "Tue","Wed","Thu"]
IV = ["Wind"]
Endo = ["p"]
Predict = ["q"]


In [12]:
# OLS (1)

X_og = sm.add_constant(df[Endo])
Y_og = df[Predict]

results_og = sm.OLS(Y_og, X_og).fit()

print(results_og.summary())


                            OLS Regression Results                            
Dep. Variable:                      q   R-squared:                       0.078
Model:                            OLS   Adj. R-squared:                  0.069
Method:                 Least Squares   F-statistic:                     9.167
Date:                Fri, 22 Sep 2023   Prob (F-statistic):            0.00308
Time:                        09:37:44   Log-Likelihood:                -119.35
No. Observations:                 111   AIC:                             242.7
Df Residuals:                     109   BIC:                             248.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.4187      0.076    110.445      0.0

In [13]:
# OLS (2)

X_og = sm.add_constant(df[Endo + Exo])
Y_og = df[Predict]

results_og = sm.OLS(Y_og, X_og).fit()

print(results_og.summary())


                            OLS Regression Results                            
Dep. Variable:                      q   R-squared:                       0.220
Model:                            OLS   Adj. R-squared:                  0.183
Method:                 Least Squares   F-statistic:                     5.940
Date:                Fri, 22 Sep 2023   Prob (F-statistic):           7.08e-05
Time:                        09:37:44   Log-Likelihood:                -110.00
No. Observations:                 111   AIC:                             232.0
Df Residuals:                     105   BIC:                             248.3
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.6069      0.143     60.170      0.0

---

# 2 Step Least Squares Regression

In [14]:
# Declare Variables

Exo = ["Mon", "Tue","Wed","Thu"]
IV = ["Wind"]
Endo = ["p"]
Predict = ["q"]


In [15]:
# 2SLS (5)

# First Stage Regression
result1, p_hat = firstStageRegress(IV + Exo, Endo)
df["phat"] = p_hat
print(result1.summary())

# Second Stage Regression
result2 = secondStageRegress(["phat"] + Exo, Predict)
print(result2.summary())

# Wu-Hausman and Sargan Test
mlr2 = lm.IV2SLS(dependent=df[Predict], 
                 exog=df[Exo], 
                 endog=df[Endo], 
                 instruments=df[IV]).fit(cov_type="homoskedastic", debiased=True)
    
print(mlr2.wu_hausman(),"\n")
print(mlr2.sargan)

print(mlr2.sargan.pval)

                            OLS Regression Results                            
Dep. Variable:                      p   R-squared:                       0.203
Model:                            OLS   Adj. R-squared:                  0.165
Method:                 Least Squares   F-statistic:                     5.343
Date:                Fri, 22 Sep 2023   Prob (F-statistic):           0.000205
Time:                        09:37:46   Log-Likelihood:                -37.580
No. Observations:                 111   AIC:                             87.16
Df Residuals:                     105   BIC:                             103.4
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.5084      0.468     -5.358      0.0