In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl 
import math
import scipy
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats.stats import pearsonr

import sys
sys.path.append("../")

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [8]:
from sklearn.datasets import load_boston
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

boston = load_boston()
dataset = pd.DataFrame(boston.data, columns=boston.feature_names)
dataset['target'] = boston.target
dataset

observations = len(dataset)
variables = dataset.columns[:-1]
X = dataset.iloc[:, :-1]
y = dataset['target'].values

In [9]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

In [13]:
linear_regression = linear_model.LinearRegression(normalize=False, fit_intercept=True)
create_interactions = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
create_interactions

PolynomialFeatures(include_bias=False, interaction_only=True)

In [14]:
def r2_est(X, y):
    return r2_score(y, linear_regression.fit(X,y).predict(X))

In [16]:
baseline = r2_est(X, y)
print('Baseline R2: %0.3f' % baseline)

Baseline R2: 0.741


In [18]:
Xi = create_interactions.fit_transform(X)
main_effects = create_interactions.n_input_features_

In [22]:
create_interactions.powers_[(main_effects):]

array([[1, 1, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 1, 0],
       [0, 0, 0, ..., 1, 0, 1],
       [0, 0, 0, ..., 0, 1, 1]])

In [19]:
for k, effect in enumerate(create_interactions.powers_[(main_effects):]):
    termA, termB = variables[effect==1]
    increment = r2_est(Xi[:, list(range(0, main_effects)) + [main_effects + k]], y) - baseline 
    if increment > 0.01:
        print('Adding interaction %8s * %8s R2: %5.3f' % (termA, termB, increment))

Adding interaction     CRIM *     CHAS R2: 0.011
Adding interaction     CRIM *       RM R2: 0.021
Adding interaction       ZN *       RM R2: 0.013
Adding interaction    INDUS *       RM R2: 0.038
Adding interaction    INDUS *      DIS R2: 0.013
Adding interaction      NOX *       RM R2: 0.027
Adding interaction       RM *      AGE R2: 0.024
Adding interaction       RM *      DIS R2: 0.018
Adding interaction       RM *      RAD R2: 0.049
Adding interaction       RM *      TAX R2: 0.054
Adding interaction       RM *  PTRATIO R2: 0.041
Adding interaction       RM *        B R2: 0.020
Adding interaction       RM *    LSTAT R2: 0.064


In [20]:
Xi = X 
Xi['interaction'] = X['RM'] * X['LSTAT']
print("R2 of a model with RM*LSTAT interaction: %0.3f" % r2_est(Xi, y))

R2 of a model with RM*LSTAT interaction: 0.805
