In [248]:
import numpy as np
import pandas as pd
import seaborn as sns
import math
import matplotlib.pyplot as plt
import statsmodels.api as sm
import linearmodels.iv.model as lm
from scipy import stats
from itertools import combinations

In [249]:
def firstStageRegress(variables, endo):
    """First Stage Regression"""
    
    X_stage1 = sm.add_constant(df[variables])
    y_stage1 = df[endo]

    # Fit the regression to find the predicted values
    results_stage1 = sm.OLS(y_stage1, X_stage1).fit()
    predictor = results_stage1.predict(X_stage1)

    return results_stage1, predictor 

def secondStageRegress(variables, predict):  
    """Second Stage Regression"""
    
    X_stage2 = sm.add_constant(df[variables])
    y_stage2 = df[predict]  

    # Fit the regression model
    results_stage2 = sm.OLS(y_stage2, X_stage2).fit()
    
    return results_stage2

In [250]:
def generateCombi(IVs):
    ## Hypothesis of Exogenous Variable (shouldnt be correlated to supply)
    # Day of the week affects how much the consumers buy fish, fixed effect

    ## Hypothesis of IVs (must be correlated to price, not supply)
    # Stormy, Mixed, Wind shows severity of weather, better quality fish, higher price
    # Stormy, Mixed, Wind shows severity of weather, affects fishing condition, higher effort, set higher price
    # Rainy and Cold, assumption for demand
    
    combi_list = []

    # Generate all possible combinations of variables and append into a list
    for r in range(1, len(IVs) + 1):
        combi_list.extend(combinations(IVs, r))

    # Print total number of combinations made
    print(f"Total combinations: {len(combi_list)} \n")
    
    return combi_list

In [251]:
def weakInstrumentTest(IV_combis, Exo):
    """Weak Instrument Test; significance level is deemsed at 0.05%"""

    print("=====================================")
    print("Weak Instrument Test Filtered Results")
    print("=====================================")
    
    sign_IVs = [] 
    sign_phat = []
    sign_result = []

    for iv_combi in IV_combis:
        # Setting IV Variables
        IV = list(iv_combi)

        # First Stage Regression
        result, p_hat = firstStageRegress(IV + Exo, "p")

        # Check that:
        # - the F Statistics p value is significant
        # - the IV variables are not present in the Exogenous Variables
        if result.f_pvalue < 0.05 and all(iv not in Exo for iv in IV) and all(result.pvalues[IV] < 0.05):  # Check if all p-values are less than 0.05
            # Append the corresponding IV variable into a list if true
            sign_IVs.append(IV)
            # Append the corresponding estimated coefficient into a list if true
            sign_phat.append(p_hat)
            # Append the corresponding regression result into a list if true
            sign_result.append(result)

    # For combinations that fulfilled the conditionals, print out their IVs and corresponding F-statistics p value 
    for idx, IV in enumerate(sign_IVs):
        print(f"{idx + 1}. IV:{IV}, Prob (F-stat):{sign_result[idx].f_pvalue}")
        
    return sign_IVs, sign_phat, sign_result

In [252]:
def wuHausmanAndSargan(IV_combis, Exo):
    """Wu-Hausman Test and Sargan Test, where significance level is deemed at 0.05%"""
    
    print("==================================")
    print("Wu-Hausman and Sargan Test Results")
    print("==================================")

    sign_IVs = []
    feasible_combi = []
    sargan_p = []

    # Iterate through the combinations of IVs identified from Weak Instrument Test
    for idx, iv_combi in enumerate(IV_combis):
        
        # Perform 2 Step Least Square Regression
        mlr2 = lm.IV2SLS(dependent=df[["q"]], 
                         exog=df[Exo], 
                         endog=df[["p"]], 
                         instruments=df[list(iv_combi)]).fit(cov_type="homoskedastic", debiased=True)

        # Check that:
        # - p-value of Wu-Hausman test is significant
        # - p-value of Sargan test is insignificant or nan
        if (mlr2.wu_hausman().pval < 0.05 and mlr2.sargan.pval > 0.05) or (mlr2.wu_hausman().pval < 0.05 and math.isnan(mlr2.sargan.pval)):
            # Append the corresponding IV variable into a list if true
            sign_IVs.append(iv_combi)
            sargan_p.append(mlr2.sargan.pval)

    # For combinations that fulfilled the conditionals, append them into a list
    for idx, combi in enumerate(sign_IVs):
        IV = combi

        # Perform First Stage Regression
        result, p_hat = firstStageRegress(IV + Exo, "p")

        feasible_combi.append({
            "iv": IV,
            "result": result,
            "p_hat": p_hat,
            "sargan_p": sargan_p[idx]
        })

    # Print out the combinations in ascending order based on the p-values of the F-statistics test
    sorted_combi = sorted(feasible_combi, key=lambda x: x['result'].f_pvalue)    
    for idx, combi in enumerate(sorted_combi):
        print(f"{idx+1}. IVs: {combi['iv']} Sargan p-value: {combi['sargan_p']}")
        
    print(f"\nTotal Feasible Combination: {len(sorted_combi)}")
        
    return feasible_combi

---
&nbsp;

In [253]:
# Read dataset
df = pd.read_csv("Data-GP1-1(updated).csv")
df

Unnamed: 0,Mon,Tue,Wed,Thu,Date,Jan,Feb,Mar,Apr,May,Month,Year,Stormy,Mixed,p,q,Rainy,Cold,Wind
0,1,0,0,0,2,0,0,0,0,0,12,91,1,0,-0.430783,8.994421,1,0,2.995732
1,0,1,0,0,3,0,0,0,0,0,12,91,1,0,0.000000,7.707063,0,0,2.995732
2,0,0,1,0,4,0,0,0,0,0,12,91,0,1,0.072321,8.350194,1,1,2.813411
3,0,0,0,1,5,0,0,0,0,0,12,91,1,0,0.247139,8.656955,0,1,3.036554
4,0,0,0,0,6,0,0,0,0,0,12,91,1,0,0.664327,7.844241,0,1,3.036554
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0,0,0,4,0,0,0,0,1,5,92,0,0,-0.798508,8.610683,0,0,2.862201
107,0,1,0,0,5,0,0,0,0,1,5,92,0,1,-0.087011,7.162397,0,0,2.908721
108,0,0,1,0,6,0,0,0,0,1,5,92,0,1,0.184922,7.362010,0,0,2.862201
109,0,0,0,1,7,0,0,0,0,1,5,92,0,1,0.223143,8.764053,0,0,2.813411


---
&nbsp;

### **IV**


In [265]:
# Declare potential IVs
IV = ["Wind", "Stormy", "Mixed"]

---
&nbsp;

### **Exogeneous variables**: _Mon_, _Tue_, _Wed_, _Thu_


In [266]:
Exo = ["Mon","Tue", "Wed", "Thu"]

combinations_list = generateCombi(IV)

# Weak Instrument Test, where significance level is deemed at 0.05%
sign_IVs, sign_phat, sign_result = weakInstrumentTest(IV_combis=combinations_list,
                                                     Exo=Exo)
print("\n")

# Wu-Hausman Test and Sargan Test, where significance level is deemed at 0.05%
feasible_combi = wuHausmanAndSargan(IV_combis=sign_IVs, 
                                    Exo=Exo)

Total combinations: 7 

Weak Instrument Test Filtered Results
1. IV:['Wind'], Prob (F-stat):0.00020451485278110283
2. IV:['Stormy'], Prob (F-stat):0.0008155892641422602
3. IV:['Stormy', 'Mixed'], Prob (F-stat):4.34850312546488e-05


Wu-Hausman and Sargan Test Results
1. IVs: ['Stormy', 'Mixed'] Sargan p-value: 0.07487775805878072
2. IVs: ['Wind'] Sargan p-value: nan
3. IVs: ['Stormy'] Sargan p-value: nan

Total Feasible Combination: 3


&nbsp;

By logic, testing the following as IVs seems feasible
- "Stormy", "Mixed"
- "Wind"

---
&nbsp;
### **Exogeneous variables**: _Jan_, _Feb_, _Mar_, _Apr_, _May_


In [267]:
Exo = ["Jan","Feb","Mar","Apr","May"]

combinations_list = generateCombi(IV)

# Weak Instrument Test, where significance level is deemed at 0.05%
sign_IVs, sign_phat, sign_result = weakInstrumentTest(IV_combis=combinations_list,
                                                     Exo=Exo)
print("\n")

# Wu-Hausman Test and Sargan Test, where significance level is deemed at 0.05%
feasible_combi = wuHausmanAndSargan(IV_combis=sign_IVs, 
                                    Exo=Exo)

Total combinations: 7 

Weak Instrument Test Filtered Results
1. IV:['Wind'], Prob (F-stat):0.00012511880306444024
2. IV:['Stormy'], Prob (F-stat):4.403155743875432e-05
3. IV:['Stormy', 'Mixed'], Prob (F-stat):3.0679117396715093e-06


Wu-Hausman and Sargan Test Results
1. IVs: ['Stormy', 'Mixed'] Sargan p-value: 0.16399966835593704
2. IVs: ['Stormy'] Sargan p-value: nan
3. IVs: ['Wind'] Sargan p-value: nan

Total Feasible Combination: 3


&nbsp;

By logic, testing the following as IVs seems feasible
- "Stormy" and "Mixed"
- "Wind"

---
&nbsp;
### **Exogeneous variables**: _Rainy_, _Cold_



In [268]:
Exo = ["Rainy","Cold"]

combinations_list = generateCombi(IV)

# Weak Instrument Test, where significance level is deemed at 0.05%
sign_IVs, sign_phat, sign_result = weakInstrumentTest(IV_combis=combinations_list,
                                                     Exo=Exo)
print("\n")

# Wu-Hausman Test and Sargan Test, where significance level is deemed at 0.05%
feasible_combi = wuHausmanAndSargan(IV_combis=sign_IVs, 
                                    Exo=Exo)

Total combinations: 7 

Weak Instrument Test Filtered Results
1. IV:['Wind'], Prob (F-stat):0.00018263991581423053
2. IV:['Stormy'], Prob (F-stat):0.00018718425444759582
3. IV:['Stormy', 'Mixed'], Prob (F-stat):1.1659022187005622e-05


Wu-Hausman and Sargan Test Results
1. IVs: ['Wind'] Sargan p-value: nan
2. IVs: ['Stormy'] Sargan p-value: nan

Total Feasible Combination: 2


&nbsp;

By logic, it does not seem feasible to test any of the feasible combination as IVs.

---
&nbsp;
### **Exogeneous variables**: _Mon_, _Tue_, _Wed_, _Thu_, _Rainy_, _Cold_


In [269]:
Exo = ["Mon","Tue","Wed","Thu","Rainy","Cold"]

combinations_list = generateCombi(IV)

# Weak Instrument Test, where significance level is deemed at 0.05%
sign_IVs, sign_phat, sign_result = weakInstrumentTest(IV_combis=combinations_list,
                                                     Exo=Exo)
print("\n")

# Wu-Hausman Test and Sargan Test, where significance level is deemed at 0.05%
feasible_combi = wuHausmanAndSargan(IV_combis=sign_IVs, 
                                    Exo=Exo)

Total combinations: 7 

Weak Instrument Test Filtered Results
1. IV:['Wind'], Prob (F-stat):0.0009730797708021459
2. IV:['Stormy'], Prob (F-stat):0.002541915515548167
3. IV:['Stormy', 'Mixed'], Prob (F-stat):0.00018904126417687228


Wu-Hausman and Sargan Test Results
1. IVs: ['Wind'] Sargan p-value: nan

Total Feasible Combination: 1


&nbsp;

By logic, testing the following as IVs seems feasible
- "Wind"

---
&nbsp;
### **Exogeneous variables**: _Jan_, _Feb_, _Mar_, _Apr_, _May_, _Rainy_, _Cold_


In [271]:
Exo = ["Jan","Feb","Mar","Apr","May","Rainy","Cold"]

combinations_list = generateCombi(IV)

# Weak Instrument Test, where significance level is deemed at 0.05%
sign_IVs, sign_phat, sign_result = weakInstrumentTest(IV_combis=combinations_list,
                                                     Exo=Exo)
print("\n")

# Wu-Hausman Test and Sargan Test, where significance level is deemed at 0.05%
feasible_combi = wuHausmanAndSargan(IV_combis=sign_IVs, 
                                    Exo=Exo)

Total combinations: 7 

Weak Instrument Test Filtered Results
1. IV:['Wind'], Prob (F-stat):0.0005222988512019358
2. IV:['Stormy'], Prob (F-stat):0.0001668751565434733
3. IV:['Stormy', 'Mixed'], Prob (F-stat):1.0772122692110355e-05


Wu-Hausman and Sargan Test Results
1. IVs: ['Stormy', 'Mixed'] Sargan p-value: 0.050084201073458345
2. IVs: ['Stormy'] Sargan p-value: nan
3. IVs: ['Wind'] Sargan p-value: nan

Total Feasible Combination: 3


&nbsp;

By logic, testing the following as IVs seems feasible
- "Stormy","Mixed"
- "Wind"

---
&nbsp;
### **Exogeneous variables**: _Jan_, _Feb_, _Mar_, _Apr_, _May_, _Mon_, _Tue_, _Wed_, _Thu_, _Rainy_, _Cold_


In [272]:
Exo = ["Jan","Feb","Mar","Apr","May","Mon","Tue","Wed","Thu","Rainy","Cold"]

combinations_list = generateCombi(IV)

# Weak Instrument Test, where significance level is deemed at 0.05%
sign_IVs, sign_phat, sign_result = weakInstrumentTest(IV_combis=combinations_list,
                                                     Exo=Exo)
print("\n")

# Wu-Hausman Test and Sargan Test, where significance level is deemed at 0.05%
feasible_combi = wuHausmanAndSargan(IV_combis=sign_IVs, 
                                    Exo=Exo)

Total combinations: 7 

Weak Instrument Test Filtered Results
1. IV:['Wind'], Prob (F-stat):0.0012449817672316866
2. IV:['Stormy'], Prob (F-stat):0.0008842297612944743
3. IV:['Stormy', 'Mixed'], Prob (F-stat):7.835418840717315e-05


Wu-Hausman and Sargan Test Results
1. IVs: ['Stormy', 'Mixed'] Sargan p-value: 0.06099799510750792
2. IVs: ['Wind'] Sargan p-value: nan

Total Feasible Combination: 2


&nbsp;

By logic, testing the following as IVs seems feasible
- "Stormy","Mixed"
- "Wind"

---
&nbsp;
### **Exogeneous variables**: _Jan_, _Feb_, _Mar_, _Apr_, _May_, _Mon_, _Tue_, _Wed_, _Thu_


In [270]:
Exo = ["Jan","Feb","Mar","Apr","May","Mon","Tue","Wed","Thu"]

combinations_list = generateCombi(IV)

# Weak Instrument Test, where significance level is deemed at 0.05%
sign_IVs, sign_phat, sign_result = weakInstrumentTest(IV_combis=combinations_list,
                                                     Exo=Exo)
print("\n")

# Wu-Hausman Test and Sargan Test, where significance level is deemed at 0.05%
feasible_combi = wuHausmanAndSargan(IV_combis=sign_IVs, 
                                    Exo=Exo)

Total combinations: 7 

Weak Instrument Test Filtered Results
1. IV:['Wind'], Prob (F-stat):0.00042538086003042377
2. IV:['Stormy'], Prob (F-stat):0.0003526743130860967
3. IV:['Stormy', 'Mixed'], Prob (F-stat):3.596709123273379e-05


Wu-Hausman and Sargan Test Results
1. IVs: ['Stormy', 'Mixed'] Sargan p-value: 0.1644538932253402
2. IVs: ['Stormy'] Sargan p-value: nan
3. IVs: ['Wind'] Sargan p-value: nan

Total Feasible Combination: 3


&nbsp;

By logic, testing the following as IVs seems feasible
- "Stormy","Mixed"
- "Wind"