In [76]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_boston, load_iris, load_wine, load_digits, \
    load_breast_cancer, load_diabetes, fetch_mldata
import statsmodels.api as sm
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import itertools

In [67]:
def fit(X,y,order=2):
    "Pass X=[x1,x2] to get nth order polynomial with interaction terms"
    poly = PolynomialFeatures(order, interaction_only=False)
    model = make_pipeline(poly, Ridge())
    model.fit(X, y)
    ridge = model.named_steps['ridge']
    # y_pred = model.predict(x)
    # ax.plot(x, y_pred, ':', c='k', lw=.7)
    terms = poly.get_feature_names()
    terms[0] = 'c'
#     terms = reversed(terms)
    terms = [f'{c:.4f}{t}' for c,t in zip(ridge.coef_, terms)]
#     print(list(zip(ridge.coef_, terms)))
    eqn = ' + '.join( terms )
    return model, eqn

In [171]:
def generation(X_train, X_test, y_train, y_test, k = 5):
#     features = X_train.columns.values
    allpairs = list(itertools.combinations(range(X_train.shape[1]), 2))

    models = []
    eqns = []
    pairs = []
    r2_trains, mae_trains = [], []
    r2_tests, mae_tests = [], []
    for j,pair in enumerate(allpairs):
#         feats = features[[pair[0],pair[1]]]
        feats = (pair[0],pair[1])
        pairs.append(feats)
    #     print(pair)
        model, eqn = fit(X_train[feats], y_train)
        models.append(model)
        eqns.append(eqn)
        y_pred = model.predict(X_train[feats])
        r2_train, mae_train = r2_score(y_train, y_pred), mean_absolute_error(y_train, y_pred)
        r2_trains.append(r2_train)
        mae_trains.append(mae_train)
        y_pred = model.predict(X_test[feats])
        r2_test, mae_test = r2_score(y_test, y_pred), mean_absolute_error(y_test, y_pred)
        r2_tests.append(r2_test)
        mae_tests.append(mae_test)

    pairs = np.array(pairs)
    models = np.array(models)
    eqns = np.array(eqns)
    r2_tests = np.array(r2_tests)
    mae_tests = np.array(mae_tests)
    r2_trains = np.array(r2_trains)
    mae_trains = np.array(mae_trains)

    # pick best k per generation
    best_idx = np.argsort(mae_tests)
    best_idx = best_idx[:k]
    
    output = np.empty(shape=(X_train.shape[0], k))
    for j,model in enumerate(models[best_idx]):
#         pairs[]
        print(model)
        output[:,j] = model.predict(X_train[feats])

#     info = list(zip(models[best_idx], eqns[best_idx], pairs[best_idx], r2_trains[best_idx], mae_trains[best_idx], r2_tests[best_idx], mae_tests[best_idx]))
#     for model, eqn, pair, r2_train, mae_train, r2, mae in info:
#         print(pair, r2_train, mae_train, r2, mae, eqn)
        
    return models[best_idx], output

In [172]:
df = pd.read_csv("rent10k.csv")

X = df.drop('price',axis=1)
y = df['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

input = X_train
models, output = generation(input, X_test, y_train, y_test, k=5)
print(output)

KeyError: (0, 1)