In [None]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import mean_squared_error, plot_confusion_matrix

In [30]:
def display_poly_corr(df, features, target, degree=1):
    df_cont = df[features]
    target_col = df[target]

    poly = PolynomialFeatures(degree=degree)
    df_cont_poly = poly.fit_transform(df_cont)

    # Code for turning a polynomial transformed ndarray back...
    #...into a dataframe adapted from StackOverFlow post linked below
    #https://stackoverflow.com/questions/36728287/sklearn-preprocessing
    #-polynomialfeatures-how-to-keep-column-names-headers-of
    df_poly = pd.DataFrame(df_cont_poly, columns = poly.get_feature_names(cont_vars))

    df_poly[target] = target_col
    
    df_corr = df_poly.corr()
    df_corr.sort_values(by=target, ascending=False, inplace=True)
    
    return df_corr[target]

In [2]:
def get_accuracy(model, model_string, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    train = model.score(X_train, y_train)
    test = model.score(X_test, y_test)
    print(model_string+":")
    print(f"Train: {train}")
    print(f"Test: {test}\n")

In [3]:
# Code adapted from Patrick Wales-Dinan's demonstration on saving GridSearches

class GridSearchContainer:
    def __init__(self, X, y):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y,
                                                                                random_state=42,
                                                                                stratify = y)
        self.model_params = {}
        self.best_models = []
        self.model_df = pd.DataFrame
        self.count = 0
        
    def search(self, estimator, params, mod_name='model', evaluator='accuracy'):
        
            gs = GridSearchCV(estimator,
                  param_grid = params,
                  cv = 5,
                  scoring = evaluator)
    
            gs.fit(self.X_train, self.y_train)

            print(f"Train {evaluator}: {gs.score(self.X_train, self.y_train)}")
            print(f"Test {evaluator}: {gs.score(self.X_test, self.y_test)}")

            gs.best_params_[evaluator] = gs.best_score_
                
            self.model_params[f'{mod_name}_{self.count}'] = gs.best_params_

            self.model_df = pd.DataFrame.from_dict(self.model_params, orient='index')
            self.model_df.sort_values(by=evaluator, ascending=False, inplace=True)

            self.best_models.append((gs.best_estimator_, gs.best_score_))
            
            self.count+=1