In [1]:
import statsmodels.api as sm
import pandas as pd
import numpy as np
import math
import scipy.stats
import bisect

In [5]:
r"""This is simmilar to forward selection, except when a new variable is
inserted, we check the p-values for the previously inserted regressors to ensure they are still significant"""

def stepwise_section_v1(Y, X, alpha):
    
    # a list to store the included regressors
    # the constant is already included
    included_regressors = [0]
    
     #keeps track of the remaining regressors
    remaining_regressors = list(range(1, np.shape(X)[1]))
    
    #initialize the "best" model with just the constant
    final_results = sm.OLS(Y, X[:, included_regressors]).fit()
    
    
    while len(remaining_regressors) > 0:
    
        #increase the number of included regressors, and initialize with 
        #the first remaining regressor
        included_regressors.append(remaining_regressors[0])

        #initialize the temp best model for this level in the forward selection process
        temp_best_model = sm.OLS(Y, X[:, included_regressors]).fit()

        #initialize the best regressor for this level
        best_regressor = remaining_regressors[0]
        #consider the remaining regressors, and find the best one to choose next 
        for i in range(1, len(remaining_regressors)):
            #update the included_regressors to use the current regressor
            included_regressors[-1] = remaining_regressors[i]
            #create the results for the model
            temp_model = sm.OLS(Y, X[:, included_regressors]).fit()
            #see if the current regressor is better than the current best by reducing ssr
            if temp_model.ssr < temp_best_model.ssr:
                temp_best_model = temp_model
                best_regressor = included_regressors[-1]
                
        #check if the regressor should be added to the model, if so we will continue to add regressors
        if temp_best_model.pvalues[-1] < alpha:
            final_results = temp_best_model
            remaining_regressors.remove(best_regressor)
            included_regressors[-1] = best_regressor
            
            #now we need to add functionality that eliminates any included regressors that are
            #not the best. We skip the constant
            for i in range(1, len(included_regressors)):
                #remove any previous regressors who are no longer significant
                if final_results.pvalues[i] > alpha:
                    included_regressors.pop(i)
            final_results = sm.OLS(Y, X[:, included_regressors])
        #if the regressor does not meet the threshold, then terminate the loop and return
        else:
            del included_regressors[-1]
            return included_regressors, final_results
    
    return included_regressors, final_results
    
    

In [6]:
df  = pd.read_excel('NFL_Data.xlsx', sheet_name = 'Data')
df_n = df.to_numpy()
X = df_n[:, 1:(df_n.shape[1])]
Y = df_n[:, [0]]
#add a column of 1's to the front to account for the constant
X = sm.add_constant(X)

In [7]:
regressors, my_results= stepwise_section_v1(Y, X, 0.1)

In [8]:
print(regressors)
print(my_results.fit().summary())

[0, 8, 2, 7]
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.786
Model:                            OLS   Adj. R-squared:                  0.760
Method:                 Least Squares   F-statistic:                     29.44
Date:                Fri, 07 Jan 2022   Prob (F-statistic):           3.27e-08
Time:                        18:49:34   Log-Likelihood:                -52.532
No. Observations:                  28   AIC:                             113.1
Df Residuals:                      24   BIC:                             118.4
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.8084      7.901     -0