In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


#load dataset
dataset = pd.read_csv('data.txt',header=None)

#split dataset whitespace into columns
dataset = pd.DataFrame(dataset[0].str.split(expand=True))

#look for missing values
print(dataset.isnull().sum())

col_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO','B', 'LSTAT', 'MEDV']

dataset.columns = col_names

dataset.head()


0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
dtype: int64


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [2]:
dataset.dtypes

CRIM       object
ZN         object
INDUS      object
CHAS       object
NOX        object
RM         object
AGE        object
DIS        object
RAD        object
TAX        object
PTRATIO    object
B          object
LSTAT      object
MEDV       object
dtype: object

In [3]:
dataset_float = dataset.astype(float)

In [4]:
dataset_float.dtypes

CRIM       float64
ZN         float64
INDUS      float64
CHAS       float64
NOX        float64
RM         float64
AGE        float64
DIS        float64
RAD        float64
TAX        float64
PTRATIO    float64
B          float64
LSTAT      float64
MEDV       float64
dtype: object

In [5]:
X = dataset.iloc[: , 0:13].values
y = dataset.iloc[ : ,13].values

In [6]:
X = X.astype(float)
y = y.astype(float)

In [7]:
print('X:',X.shape)
print('y:',y.shape)

X: (506, 13)
y: (506,)


In [None]:
# Dealing with categorical variables
# from the scikit.preprocessing library we first import the Label Encoder class
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelEncoder_X = LabelEncoder()
X[: , 3] = labelEncoder_X.fit_transform(X[ : , 3])
# OneHotEncoder class avoids categorical variables from being evaluated by their int value
oneHotEncoder = OneHotEncoder(categorical_features = [3])
# toarray() method is used to return an ndarray 
X = oneHotEncoder.fit_transform(X).toarray()

# remove the extra dummy variable
X = X[: , 1:]
print(X.shape)
X

In [None]:
X[0:5,:]

In [8]:
X = np.append(arr = np.ones((506,1)).astype(int), values = X, axis = 1)


In [9]:
print('X:',X.shape)
print('y:',y.shape)

X: (506, 14)
y: (506,)


In [10]:
#split data into Training and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

## MODEL 1
* first add columns of ones, then normalize equations, multiply weights with x test to make a prediction using dot product



In [11]:
print('X:',X.shape)
print('y:',y.shape)
print('X_train:',X_train.shape)
print('X_test',X_test.shape)
print('y_train',y_train.shape)
print('y_test',y_test.shape)

X: (506, 14)
y: (506,)
X_train: (354, 14)
X_test (152, 14)
y_train (354,)
y_test (152,)


In [12]:
X[0]

array([1.000e+00, 6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01,
       6.575e+00, 6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01,
       3.969e+02, 4.980e+00])

In [13]:
X_train[0]

array([  1.     ,   1.62864,   0.     ,  21.89   ,   0.     ,   0.624  ,
         5.019  , 100.     ,   1.4394 ,   4.     , 437.     ,  21.2    ,
       396.9    ,  34.41   ])

In [14]:
# Need to add a column of ones to X (corresponding to intercept term)
# X_train = np.append(arr = np.ones((354,1)).astype(int), values = X_train, axis = 1)
# X_test = np.append(arr = np.ones((152,1)).astype(int), values = X_test, axis = 1)


In [15]:
print('X:',X.shape)
print('y:',y.shape)
print('X_train:',X_train.shape)
print('X_test',X_test.shape)
print('y_train',y_train.shape)
print('y_test',y_test.shape)

X: (506, 14)
y: (506,)
X_train: (354, 14)
X_test (152, 14)
y_train (354,)
y_test (152,)


In [16]:
X_train[0]

array([  1.     ,   1.62864,   0.     ,  21.89   ,   0.     ,   0.624  ,
         5.019  , 100.     ,   1.4394 ,   4.     , 437.     ,  21.2    ,
       396.9    ,  34.41   ])

In [17]:
# Normalizing the features
# from sklearn.preprocessing import StandardScaler
# sc_X = StandardScaler()
# X_train = sc_X.fit_transform(X_train)
# X_test = sc_X.transform(X_test)

In [18]:
xTx = X_train.T.dot(X_train)

In [19]:
xTx = np.linalg.inv(xTx)

In [20]:
xTx_xT = xTx.dot(X_train.T)

In [21]:
w = xTx_xT.dot(y_train)

In [22]:
# (XTX)^-1 XTy
w

array([ 3.79371077e+01, -1.21310401e-01,  4.44664254e-02,  1.13416945e-02,
        2.51124642e+00, -1.62312529e+01,  3.85906801e+00, -9.98516565e-03,
       -1.50026956e+00,  2.42143466e-01, -1.10716124e-02, -1.01775264e+00,
        6.81446545e-03, -4.86738066e-01])

In [23]:
xTx[0]

array([ 1.66133921e+00, -1.02127659e-04, -1.57616007e-04,  1.43340003e-03,
       -1.17056237e-02, -6.97059989e-01, -9.92515088e-02,  3.97621068e-04,
       -2.26749787e-02,  6.22892307e-03, -1.30722571e-04, -2.42296929e-02,
       -2.48126204e-04, -5.50229262e-03])

In [24]:
xTx_xT

array([[-6.71297485e-02,  2.86512764e-02, -1.17004473e-01, ...,
         6.23842093e-02,  1.90654118e-02,  1.45851082e-01],
       [-3.59765137e-04,  2.52463381e-05, -2.46578636e-05, ...,
        -6.43318902e-05, -7.49277350e-05, -1.84803896e-04],
       [ 1.79967346e-04,  1.32934324e-04,  1.13644585e-04, ...,
        -8.99901507e-05, -2.34452620e-04, -4.85203694e-05],
       ...,
       [ 1.55354534e-03,  1.12416223e-03,  2.75592736e-03, ...,
        -9.43546795e-04, -2.03127996e-03, -2.15434802e-03],
       [ 3.84141778e-05, -3.97544492e-06,  1.42068851e-05, ...,
         5.08755925e-06,  1.24516226e-05,  6.63620947e-06],
       [ 2.14151128e-03, -3.25790533e-04,  1.63850220e-04, ...,
        -5.99385743e-04,  9.14849809e-04, -1.36780237e-04]])

In [25]:
predictions = X_test.dot(w)
print(predictions.shape)
predictions

(152,)


array([24.9357079 , 23.75163164, 29.32638296, 11.97534566, 21.37272478,
       19.19148525, 20.5717479 , 21.21154015, 19.04572003, 20.35463238,
        5.44119126, 16.93688709, 17.15482272,  5.3928209 , 40.20270696,
       32.31327348, 22.46213268, 36.50124666, 31.03737014, 23.17124551,
       24.74815321, 24.49939403, 20.6595791 , 30.4547583 , 22.32487164,
       10.18932894, 17.44286422, 18.26103077, 35.63299326, 20.81960303,
       18.27218007, 17.72047628, 19.33772473, 23.62254823, 28.97766856,
       19.45036239, 11.13170639, 24.81843595, 18.05294835, 15.59712226,
       26.21043403, 20.81140432, 22.17349382, 15.48367365, 22.62261604,
       24.88561528, 19.74754478, 23.0465628 ,  9.84579105, 24.36378793,
       21.47849008, 17.62118176, 24.39160873, 29.95102691, 13.57219422,
       21.53645439, 20.53306273, 15.03433182, 14.3232289 , 22.11929299,
       17.07321915, 21.54141094, 32.96766968, 31.371599  , 17.7860591 ,
       32.75069556, 18.74795323, 19.21428022, 19.41970047, 23.08

In [26]:
print(y_test.shape)
y_test

(152,)


array([22.6, 50. , 23. ,  8.3, 21.2, 19.9, 20.6, 18.7, 16.1, 18.6,  8.8,
       17.2, 14.9, 10.5, 50. , 29. , 23. , 33.3, 29.4, 21. , 23.8, 19.1,
       20.4, 29.1, 19.3, 23.1, 19.6, 19.4, 38.7, 18.7, 14.6, 20. , 20.5,
       20.1, 23.6, 16.8,  5.6, 50. , 14.5, 13.3, 23.9, 20. , 19.8, 13.8,
       16.5, 21.6, 20.3, 17. , 11.8, 27.5, 15.6, 23.1, 24.3, 42.8, 15.6,
       21.7, 17.1, 17.2, 15. , 21.7, 18.6, 21. , 33.1, 31.5, 20.1, 29.8,
       15.2, 15. , 27.5, 22.6, 20. , 21.4, 23.5, 31.2, 23.7,  7.4, 48.3,
       24.4, 22.6, 18.3, 23.3, 17.1, 27.9, 44.8, 50. , 23. , 21.4, 10.2,
       23.3, 23.2, 18.9, 13.4, 21.9, 24.8, 11.9, 24.3, 13.8, 24.7, 14.1,
       18.7, 28.1, 19.8, 26.7, 21.7, 22. , 22.9, 10.4, 21.9, 20.6, 26.4,
       41.3, 17.2, 27.1, 20.4, 16.5, 24.4,  8.4, 23. ,  9.7, 50. , 30.5,
       12.3, 19.4, 21.2, 20.3, 18.8, 33.4, 18.5, 19.6, 33.2, 13.1,  7.5,
       13.6, 17.4,  8.4, 35.4, 24. , 13.4, 26.2,  7.2, 13.1, 24.5, 37.2,
       25. , 24.1, 16.6, 32.9, 36.2, 11. ,  7.2, 22

In [27]:
#Mean Squared Error
mse_m1 = np.square(np.subtract(predictions, y_test)).mean()
mse_m1
#27.195965766884157 encoding categorical variable
#27.19596576688312 witouth using categorical variable encoder

27.19596576688312

## MODEL 2

In [28]:
#Fitting Multiple Linear Regression to Training Set
# import LinearRegression class from scikit-learn
# initialize a LinearRegression object and fit X and y train sets
from sklearn.linear_model import LinearRegression
mlrObj = LinearRegression()
mlrObj.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [29]:
# weight parameters

mlrObj.coef_

array([ 0.00000000e+00, -1.21310401e-01,  4.44664254e-02,  1.13416945e-02,
        2.51124642e+00, -1.62312529e+01,  3.85906801e+00, -9.98516565e-03,
       -1.50026956e+00,  2.42143466e-01, -1.10716124e-02, -1.01775264e+00,
        6.81446545e-03, -4.86738066e-01])

In [30]:
X_train

array([[1.00000e+00, 1.62864e+00, 0.00000e+00, ..., 2.12000e+01,
        3.96900e+02, 3.44100e+01],
       [1.00000e+00, 1.14600e-01, 2.00000e+01, ..., 1.86000e+01,
        3.94960e+02, 7.73000e+00],
       [1.00000e+00, 5.57780e-01, 0.00000e+00, ..., 2.12000e+01,
        3.94670e+02, 1.69600e+01],
       ...,
       [1.00000e+00, 1.50980e-01, 0.00000e+00, ..., 1.78000e+01,
        3.94510e+02, 1.03000e+01],
       [1.00000e+00, 2.29270e-01, 0.00000e+00, ..., 1.79000e+01,
        3.92740e+02, 1.88000e+01],
       [1.00000e+00, 1.39140e-01, 0.00000e+00, ..., 1.66000e+01,
        3.96900e+02, 1.46900e+01]])

In [31]:
X_train[0]

array([  1.     ,   1.62864,   0.     ,  21.89   ,   0.     ,   0.624  ,
         5.019  , 100.     ,   1.4394 ,   4.     , 437.     ,  21.2    ,
       396.9    ,  34.41   ])

In [32]:
# Predicting on Test Set
y_pred = mlrObj.predict(X_test)
print( "Test Set: \n", y_test,"\n")
print( "Predict Set: \n", y_pred)

#Mean Squared Error
mse_m2 = np.square(np.subtract(y_pred, y_test)).mean()
mse_m2

Test Set: 
 [22.6 50.  23.   8.3 21.2 19.9 20.6 18.7 16.1 18.6  8.8 17.2 14.9 10.5
 50.  29.  23.  33.3 29.4 21.  23.8 19.1 20.4 29.1 19.3 23.1 19.6 19.4
 38.7 18.7 14.6 20.  20.5 20.1 23.6 16.8  5.6 50.  14.5 13.3 23.9 20.
 19.8 13.8 16.5 21.6 20.3 17.  11.8 27.5 15.6 23.1 24.3 42.8 15.6 21.7
 17.1 17.2 15.  21.7 18.6 21.  33.1 31.5 20.1 29.8 15.2 15.  27.5 22.6
 20.  21.4 23.5 31.2 23.7  7.4 48.3 24.4 22.6 18.3 23.3 17.1 27.9 44.8
 50.  23.  21.4 10.2 23.3 23.2 18.9 13.4 21.9 24.8 11.9 24.3 13.8 24.7
 14.1 18.7 28.1 19.8 26.7 21.7 22.  22.9 10.4 21.9 20.6 26.4 41.3 17.2
 27.1 20.4 16.5 24.4  8.4 23.   9.7 50.  30.5 12.3 19.4 21.2 20.3 18.8
 33.4 18.5 19.6 33.2 13.1  7.5 13.6 17.4  8.4 35.4 24.  13.4 26.2  7.2
 13.1 24.5 37.2 25.  24.1 16.6 32.9 36.2 11.   7.2 22.8 28.7] 

Predict Set: 
 [24.9357079  23.75163164 29.32638296 11.97534566 21.37272478 19.19148525
 20.5717479  21.21154015 19.04572003 20.35463238  5.44119126 16.93688709
 17.15482272  5.3928209  40.20270696 32.31327348 22.46

27.195965766883194

## Backward Elimination Function

In [None]:
X.shape

In [None]:
y.shape

In [None]:
import statsmodels.formula.api as sm

#define a new matrix X_sig using all columns of X
X_sig = X[:, [0,1,2,3,4,5,6,7,8,9,10,11,12,13]]
numVars = len(X_sig[0])
obj_OLS = sm.OLS(endog = y, exog = X_sig).fit()
obj_OLS.summary()

In [None]:
#after we drop X7
X_sig = X[:, [0,1,2,3,4,5,6,8,9,10,11,12,13]]
obj_OLS = sm.OLS(endog = y, exog = X_sig).fit()
obj_OLS.summary()

In [None]:
#after we drop X3
X_sig = X[:, [0,1,2,4,5,6,8,9,10,11,12,13]]
obj_OLS = sm.OLS(endog = y, exog = X_sig).fit()
obj_OLS.summary()

In [None]:
#Splitting the data into Training Set and Test Set
X_sig_train, X_sig_test, y_sig_train, y_sig_test = train_test_split(X_sig, y, test_size=0.3, random_state=0)
mlrObj_sig_1 = LinearRegression()
mlrObj_sig_1.fit(X_sig_train, y_sig_train)
y_sig_pred = mlrObj_sig_1.predict(X_sig_test)

In [None]:
y_sig_pred

In [None]:
y_sig_test

In [None]:
#Mean Squared Error
mse_m3 = np.square(np.subtract(y_sig_pred, y_sig_test)).mean()
mse_m3

In [None]:
## Automatic Backward Elimination
import statsmodels.formula.api as sm
def backwardElimination(x, sl):
    numVars = len(x[0])
    for i in range(0, numVars):
        obj_OLS = sm.OLS(y,x).fit()
        maxVar = max(obj_OLS.pvalues).astype(float)
        if maxVar > sl:
            for j in range(0, numVars - i):
                if(obj_OLS.pvalues[j].astype(float) == maxVar):
                    x = np.delete(x, j, 1)
    obj_OLS.summary()
    return x
SL = 0.05
X_sig_bm = X[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13]]
X_Modeled_bm = backwardElimination(X_sig_bm, SL)
print(X_Modeled_bm.shape)
X_Modeled_bm

In [None]:
X_Modeled_bm[0:5,:]

In [None]:
#Splitting the data into Training Set and Test Set
X_sig_train_bm, X_sig_test_bm, y_sig_train_bm, y_sig_test_bm = train_test_split(X_Modeled_bm, y, test_size=0.3, random_state=0)
mlrObj_sig_bm = LinearRegression()
mlrObj_sig_bm.fit(X_sig_train_bm, y_sig_train_bm)
y_sig_pred_bm = mlrObj_sig_bm.predict(X_sig_test_bm)

In [None]:
y_sig_pred_bm

In [None]:
y_sig_test_bm

In [None]:
#Mean Squared Error
mse_m4 = np.square(np.subtract(y_sig_pred_bm, y_sig_test_bm)).mean()
mse_m4

## Forward Selection Function

In [None]:
"""
Forward Selection
1) Decide on a Significance level for a variable to enter the model, usually 0.05
2) Create Models of y with each of xi. Select the one with the lowest p-value
3) Keep this variable in the model, create models by adding one new variable each time
4) Select the variable with the lowest p-value
    if(this p-value < significance level)
        go to step 3
    else
        Select the previous model
"""

In [33]:
## Automatic Forward Elimination
import statsmodels.formula.api as sm
def forwardSelection(x, sl):
#     initVars = len(x_temp[0])
#     x_temp = x
    numVars = len(x[0])
#     x_update = np.ndarray((506,1))
#     x_temp = x_update
    for i in range(0, numVars):
        x_temp = x[:,[i]]
        initVars = len(x_temp[0])
        obj_OLS = sm.OLS(y,x_temp).fit()
        minVar = min(obj_OLS.pvalues).astype(float)
        if minVar < sl:
            for j in range(0, initVars):
                if(obj_OLS.pvalues[j].astype(float) == minVar):
#                     x_temp = np.insert(x_temp, j, x[:,[j]], 1) 
                     x_temp = np.append(x_temp, x[:,[j]],1)
#                      x_update = np.append(x_temp, x[:,[j]],1)
 

    obj_OLS.summary()
    #x = x_temp
    return x_temp
SL = 0.05
X_sig_fm = X[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13]]
X_Modeled_fm = forwardSelection(X_sig_fm, SL)
print(X_Modeled_fm.shape)
X_Modeled_fm[0:5,:]


(506, 2)


array([[4.98, 1.  ],
       [9.14, 1.  ],
       [4.03, 1.  ],
       [2.94, 1.  ],
       [5.33, 1.  ]])

In [None]:
sl = 0.05
x = X[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13]]
print(len(X[0]))
x[:, [8]]

In [None]:
    numVars = len(x[0])
    #x_temp = x
    for i in range(0, numVars):
        x_temp= x[:, [i]]
#         x_temp = x[:,i:i+1]
        initVars = len(x_temp[0])
        obj_OLS = sm.OLS(y,x_temp).fit()
        minVar = min(obj_OLS.pvalues).astype(float)
        if minVar < sl:
            for j in range(0, initVars):
                if(obj_OLS.pvalues[j].astype(float) == minVar):
                    #x_temp = np.insert(x_temp, j, x[:,j:j+1], 1) 
                    x_temp = np.append(x_temp, x[:,[j]],1)
x_temp

In [None]:

obj_OLS.summary()


In [None]:
print(x_temp.shape)
x_temp[0:5,:]

In [None]:
x

In [34]:
#Splitting the data into Training Set and Test Set
X_sig_train_fm, X_sig_test_fm, y_sig_train_fm, y_sig_test_fm = train_test_split(X_Modeled_fm, y, test_size=0.3, random_state=0)
mlrObj_sig_fm = LinearRegression()
mlrObj_sig_fm.fit(X_sig_train_fm, y_sig_train_fm)
y_sig_pred_fm = mlrObj_sig_fm.predict(X_sig_test_fm)

In [35]:
y_sig_pred_fm

array([27.68362724, 25.56339892, 24.62430236, 15.6496373 , 22.84292332,
       26.58962815, 23.70456862, 25.79575271, 22.55248109, 24.49844406,
       14.82671764, 14.24583316, 17.56655605,  7.41075923, 32.93095029,
       30.20079328, 24.39194858, 30.83976619, 29.62959021, 26.29918592,
       27.81916695, 23.14304697, 21.14867695, 27.82884836, 24.90506319,
       21.88446395, 21.40039355, 19.50283762, 30.71390789, 18.2442546 ,
       17.33420226, 20.55811107, 24.96315164, 22.78483488, 26.03778791,
       20.61619952,  8.87265182, 26.19269043, 17.71177717, 11.45758771,
       26.48313267, 21.0905885 , 25.98938087, 17.22770678, 26.41536281,
       25.94097383, 19.45443058, 20.15149194,  6.43293704, 25.66989441,
       20.24830602, 17.7505028 , 23.51094046, 31.36256222, 18.03126363,
       19.58028888, 22.75579065, 22.52343686, 25.00187727, 18.06998926,
       27.24796389, 22.46534842, 30.08461638, 31.14957124, 20.50002263,
       30.37505862, 17.6246445 , 14.99130157, 15.63995589, 24.26

In [36]:
y_sig_test_fm

array([22.6, 50. , 23. ,  8.3, 21.2, 19.9, 20.6, 18.7, 16.1, 18.6,  8.8,
       17.2, 14.9, 10.5, 50. , 29. , 23. , 33.3, 29.4, 21. , 23.8, 19.1,
       20.4, 29.1, 19.3, 23.1, 19.6, 19.4, 38.7, 18.7, 14.6, 20. , 20.5,
       20.1, 23.6, 16.8,  5.6, 50. , 14.5, 13.3, 23.9, 20. , 19.8, 13.8,
       16.5, 21.6, 20.3, 17. , 11.8, 27.5, 15.6, 23.1, 24.3, 42.8, 15.6,
       21.7, 17.1, 17.2, 15. , 21.7, 18.6, 21. , 33.1, 31.5, 20.1, 29.8,
       15.2, 15. , 27.5, 22.6, 20. , 21.4, 23.5, 31.2, 23.7,  7.4, 48.3,
       24.4, 22.6, 18.3, 23.3, 17.1, 27.9, 44.8, 50. , 23. , 21.4, 10.2,
       23.3, 23.2, 18.9, 13.4, 21.9, 24.8, 11.9, 24.3, 13.8, 24.7, 14.1,
       18.7, 28.1, 19.8, 26.7, 21.7, 22. , 22.9, 10.4, 21.9, 20.6, 26.4,
       41.3, 17.2, 27.1, 20.4, 16.5, 24.4,  8.4, 23. ,  9.7, 50. , 30.5,
       12.3, 19.4, 21.2, 20.3, 18.8, 33.4, 18.5, 19.6, 33.2, 13.1,  7.5,
       13.6, 17.4,  8.4, 35.4, 24. , 13.4, 26.2,  7.2, 13.1, 24.5, 37.2,
       25. , 24.1, 16.6, 32.9, 36.2, 11. ,  7.2, 22

In [37]:
#Mean Squared Error
mse_5 = np.square(np.subtract(y_sig_pred_fm, y_sig_test_fm)).mean()
mse_5

39.81715050474416

In [None]:
 X[:, 3:4]

In [None]:
x_updated = X[:,[0,13]]
x_updated

In [None]:
import statsmodels.formula.api as sm

#define a new matrix X_sig using first column of X
for i in range(0,len(x[i])):
    X_sig_sf = x[:, [0,13,i]]
    obj_OLS = sm.OLS(endog = y, exog = X_sig_sf).fit()
    print(obj_OLS.summary())
    

In [None]:
for i in range(0,len(x[i])):
    X_sig_sf = x[:, [i]]
    obj_OLS = sm.OLS(endog = y, exog = X_sig_sf).fit()
    print(obj_OLS.summary())
    

In [None]:
#Splitting the data into Training Set and Test Set
X_sig_train_sf, X_sig_test_sf, y_sig_train_sf, y_sig_test_sf = train_test_split(X_sig_sf, y, test_size=0.3, random_state=0)
mlrObj_sig_sf = LinearRegression()
mlrObj_sig_sf.fit(X_sig_train_sf, y_sig_train_sf)
y_sig_pred_sf = mlrObj_sig_sf.predict(X_sig_test_sf)

In [None]:
y_sig_pred_sf 

In [None]:
y_sig_test_sf

In [None]:
#Mean Squared Error
mse_6 = np.square(np.subtract(y_sig_pred_sf, y_sig_test_sf)).mean()
mse_6

In [None]:
X_forwardTest = X[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13]]
X_forwardTest.shape



In [None]:
X_forwardTest[:,0:1].shape

In [None]:
X_forwardTest_oneVar = X_forwardTest[:,0:1]
print(X_forwardTest_oneVar.shape)
X_forwardTest_oneVar

In [None]:
X_forwardTest_oneVar = np.append(X_forwardTest_oneVar,X_forwardTest[:,1:2],1)
print(X_forwardTest_oneVar.shape)
X_forwardTest_oneVar

In [None]:
X_forwardTest_oneVar = np.append(X_forwardTest_oneVar,X_forwardTest[:,2:3],1)
print(X_forwardTest_oneVar.shape)
X_forwardTest_oneVar

In [None]:
X_temp_array = X_forwardTest
X_temp_array.shape

In [None]:
X_temp_array.colums