In [23]:
# load packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from scipy.signal import savgol_filter
import statsmodels.api as sm
import pymc3 as pm
import statsmodels.api as sm
from statsmodels.tools import add_constant
from itertools import combinations
# settings for seaborn plotting style
sns.set(color_codes=True)
# settings for seaborn plot sizes
sns.set(rc={'figure.figsize':(12,6)})
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# import logistic regression from sklearn
from sklearn.linear_model import LogisticRegression
# set seed for reproducibility
np.random.seed(621)

In [24]:
# Load Data
df = pd.read_pickle('nfl_df_averages.pkl')

In [25]:
# create X that is df columns with 'Avg' in them
X = df[[col for col in df.columns if 'Avg' in col]]
# remove home_homeAvg and home_awayAvg
X = X.drop(['home_homeAvg', 'home_awayAvg'], axis=1)

y = df['winner']

# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Logistic Model

In [26]:
# create a function that takes in df_full and a number, n of features, fits a random forest model, and returns the top n features
def top_features(n, X_test=X_test, y_test=y_test, X_train=X_train, y_train=y_train):
    np.random.seed(621)
    # create a random forest model using the features list as the X and "winner" as the Y using sklearn
    rfmodel = RandomForestClassifier(n_estimators=100)
    rfmodel.fit(X_train, y_train)
    # make predictions
    predictions = rfmodel.predict(X_test)
    # evaluate the model
    #print(confusion_matrix(y_test, predictions))
    print(accuracy_score(y_test, predictions))
    # create a dataframe of the features and their importance
    feature_importance = pd.DataFrame(rfmodel.feature_importances_, index=X_train.columns, columns=['importance'])
    # sort the dataframe by importance
    feature_importance = feature_importance.sort_values('importance', ascending=False)
    # return the top n features
    # create a list of the top n features
    top_features = feature_importance.head(n).index
    # using only the top_features, create a new X_train and X_test
    X_train = X_train[top_features]
    X_test = X_test[top_features]
    # create a new logistic regression model with max_iter = 10000000, L2 penalty, and C = 1, and sag solver
    max_iter = 10000000
    penalty = 'l2'
    C = 1
    solver = 'sag'
    logmodel = LogisticRegression(max_iter=max_iter, penalty=penalty, C=C, solver=solver)
    logmodel.fit(X_train, y_train)
    # make predictions
    predictions = logmodel.predict(X_test)
    # evaluate the model
    print(confusion_matrix(y_test, predictions))
    print(accuracy_score(y_test, predictions))
    return feature_importance.head(n), accuracy_score(y_test, predictions), logmodel


In [27]:
logmodel = top_features(3)[2]

0.6455223880597015
[[219 229]
 [153 471]]
0.6436567164179104


### Tree Model

In [28]:
# perform hyperparameter tuning on n_estimators, max_depth, and min_samples_leaf
# create a list of values for n_estimators
estimators = [100, 200, 300]

# create a list of values for max_depth
depth = [5, 10, 15]

# create a list of values for min_samples_leaf
leaf = [1, 2, 3]

# create a list of hyperparameter options
hyperparameters = dict(n_estimators=estimators, max_depth=depth, min_samples_leaf=leaf)

# create a random forest classifier
rf = RandomForestClassifier(random_state=621)

# use GridSearch to search for the best hyperparameters
from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(rf, hyperparameters, cv=5, verbose=0)

# fit the model to the training data
best_model = clf.fit(X_train, y_train)

# print the best hyperparameters
print('Best n_estimators:', best_model.best_estimator_.get_params()['n_estimators'])
print('Best max_depth:', best_model.best_estimator_.get_params()['max_depth'])
print('Best min_samples_leaf:', best_model.best_estimator_.get_params()['min_samples_leaf'])

# report bestmodel results
print('Best Model Train Accuracy:', best_model.score(X_train, y_train))

Best n_estimators: 300
Best max_depth: 15
Best min_samples_leaf: 3
Best Model Train Accuracy: 0.9983663943990665


In [29]:
# create a model using the best hyperparameters
rf = RandomForestClassifier(n_estimators=best_model.best_estimator_.get_params()['n_estimators'], max_depth=best_model.best_estimator_.get_params()['max_depth'], min_samples_leaf=best_model.best_estimator_.get_params()['min_samples_leaf'], random_state=621)

# fit the model to the training data
rf.fit(X_train, y_train)

# make predictions
predictions = rf.predict(X_test)

# evaluate the model
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[201 247]
 [153 471]]
0.6268656716417911


### Neural Model

In [30]:
# scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# create a neural network
from sklearn.neural_network import MLPClassifier

# perform a grid search to find the best parameters
from sklearn.model_selection import GridSearchCV

# define the parameter values that should be searched
param_grid = {'hidden_layer_sizes': [(30,30,30), (50,50,50)],
                'activation': ['logistic', 'relu'],
                'solver': ['sgd', 'adam'],
                'alpha': [0.0001, 0.05],
                'learning_rate': ['adaptive']}
# instantiate the grid
grid = GridSearchCV(MLPClassifier(), param_grid, refit=True, verbose=3)

# fit the grid with data
grid.fit(X_train, y_train)

# view the results as a pandas DataFrame
results = pd.DataFrame(grid.cv_results_)

# print the best parameters
print(grid.best_params_)
# print the best estimator
print(grid.best_estimator_)
# print the best score
print(grid.best_score_)
# print the best index
print(grid.best_index_)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.9s
[CV 2/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.2s
[CV 3/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.4s
[CV 4/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.3s
[CV 5/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.560 total time=   1.4s




[CV 1/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.575 total time=   4.3s




[CV 2/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.557 total time=   4.1s




[CV 3/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.592 total time=   4.5s




[CV 4/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.558 total time=   3.8s




[CV 5/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.595 total time=   3.3s
[CV 1/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.5s
[CV 2/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.5s
[CV 3/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.6s
[CV 4/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.8s
[CV 5/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.560 total time=   2.9s




[CV 1/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.569 total time=   4.7s




[CV 2/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.562 total time=   4.6s




[CV 3/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.578 total time=   4.5s




[CV 4/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.569 total time=   4.5s




[CV 5/5] END activation=logistic, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.575 total time=   4.5s
[CV 1/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.2s
[CV 2/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.2s
[CV 3/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.2s
[CV 4/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.1s
[CV 5/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.560 total time=   1.1s




[CV 1/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.611 total time=   3.6s




[CV 2/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.618 total time=   3.8s




[CV 3/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.617 total time=   5.8s




[CV 4/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.630 total time=   3.4s




[CV 5/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.610 total time=   3.1s
[CV 1/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.4s
[CV 2/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.4s
[CV 3/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.5s
[CV 4/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.559 total time=   1.4s
[CV 5/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.560 total time=   1.5s
[CV 1/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, 



[CV 2/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.621 total time=   5.1s
[CV 3/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.629 total time=   2.9s
[CV 4/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.644 total time=   2.1s
[CV 5/5] END activation=logistic, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.624 total time=   1.7s




[CV 1/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.629 total time=   3.3s




[CV 2/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.613 total time=   3.4s




[CV 3/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.621 total time=   3.4s




[CV 4/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.638 total time=   3.0s




[CV 5/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.630 total time=   2.6s




[CV 1/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.593 total time=   3.0s




[CV 2/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.561 total time=   3.5s




[CV 3/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.565 total time=   4.0s
[CV 4/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.576 total time=   3.5s
[CV 5/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.586 total time=   3.6s




[CV 1/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.599 total time=   4.9s




[CV 2/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.620 total time=   4.6s




[CV 3/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.604 total time=   4.5s




[CV 4/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.624 total time=   4.2s




[CV 5/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.625 total time=   4.2s
[CV 1/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.551 total time=   2.4s
[CV 2/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.594 total time=   3.2s
[CV 3/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.568 total time=   3.7s
[CV 4/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.585 total time=   2.8s
[CV 5/5] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.576 total time=   2.6s




[CV 1/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.609 total time=   3.4s




[CV 2/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.629 total time=   3.3s




[CV 3/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.637 total time=   3.2s




[CV 4/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.627 total time=   3.2s




[CV 5/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=sgd;, score=0.650 total time=   3.2s
[CV 1/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.571 total time=   2.7s




[CV 2/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.585 total time=   3.3s




[CV 3/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.573 total time=   3.4s




[CV 4/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.557 total time=   3.5s
[CV 5/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(30, 30, 30), learning_rate=adaptive, solver=adam;, score=0.590 total time=   3.2s




[CV 1/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.622 total time=   4.7s




[CV 2/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.617 total time=   4.7s




[CV 3/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.625 total time=   4.7s




[CV 4/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.627 total time=   4.6s




[CV 5/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd;, score=0.620 total time=   4.7s
[CV 1/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.559 total time=   3.9s
[CV 2/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.575 total time=   4.0s
[CV 3/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.572 total time=   3.1s




[CV 4/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.540 total time=   4.7s
[CV 5/5] END activation=relu, alpha=0.05, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam;, score=0.569 total time=   3.7s
{'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (30, 30, 30), 'learning_rate': 'adaptive', 'solver': 'sgd'}
MLPClassifier(alpha=0.05, hidden_layer_sizes=(30, 30, 30),
              learning_rate='adaptive', solver='sgd')
0.6303383897316219
12




In [31]:
# use grid.best_params_ to create a new model
params = grid.best_params_
mlp = MLPClassifier(hidden_layer_sizes=params['hidden_layer_sizes'], activation=params['activation'], solver=params['solver'], alpha=params['alpha'], learning_rate=params['learning_rate'], max_iter=1000000)

# fit the model to the training data
mlp.fit(X_train, y_train)

# predict using the best model
y_pred = mlp.predict(X_test)

# report bestmodel results
print('Best Model Test Accuracy:', accuracy_score(y_test, y_pred))

Best Model Test Accuracy: 0.5690298507462687


### SVM

In [13]:
# do a grid search to find the best parameters of the SVM model
# import the SVM model
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['linear']}
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)
grid.fit(X_train, y_train)

# print the best parameters
print(grid.best_params_)

# report classification accuracy of the best model
print(grid.best_estimator_.score(X_test, y_test))

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.559 total time=   1.6s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.559 total time=   1.5s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.559 total time=   1.5s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.559 total time=   1.5s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.560 total time=   1.5s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.559 total time=   1.5s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.559 total time=   1.5s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.559 total time=   1.5s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.559 total time=   1.5s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.560 total time=   1.5s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.623 total time=   1.3s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf

In [16]:
# create an SVM model
from sklearn.svm import SVC
svm = SVC(kernel='linear', C=1, random_state=621)

# fit the model to the training data
svm.fit(X_train, y_train)

# make predictions
predictions = svm.predict(X_test)

# evaluate the model
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[229 219]
 [156 468]]
0.6501865671641791


### Naive Bayes

In [41]:
# create a Naive Bayes model
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# fit the model
gnb.fit(X_train, y_train)
# predict on the test set
y_pred = gnb.predict(X_test)
# calculate the accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.6296641791044776


### Ensemble (Logistic, rf, SVM, mlp)

In [17]:
# create an ensemble model using mlp, logmodel, best_model, and svm
from sklearn.ensemble import VotingClassifier
ensemble = VotingClassifier(estimators=[('mlp', mlp), ('logmodel', logmodel), ('svm', svm)], voting='hard')
ensemble.fit(X_train, y_train)
predictions = ensemble.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[226 222]
 [157 467]]
0.6464552238805971


In [18]:
# create an ensemble model using mlp, logmodel, best_model, and svm
from sklearn.ensemble import VotingClassifier
ensemble = VotingClassifier(estimators=[('mlp', mlp), ('rf', rf), ('svm', svm)], voting='hard')
ensemble.fit(X_train, y_train)
predictions = ensemble.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[215 233]
 [146 478]]
0.6464552238805971


In [32]:
# create an ensemble model using mlp, logmodel, best_model, and svm
from sklearn.ensemble import VotingClassifier
ensemble = VotingClassifier(estimators=[('logmodel', logmodel), ('rf', rf), ('svm', svm)], voting='hard')
ensemble.fit(X_train, y_train)
predictions = ensemble.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[226 222]
 [154 470]]
0.6492537313432836


In [47]:
# create an ensemble model using gnb, logmodel, best_model, and svm
from sklearn.ensemble import VotingClassifier
ensemble = VotingClassifier(estimators=[('gnb', gnb),('logmodel', logmodel)], voting='hard')
ensemble.fit(X_train, y_train)
predictions = ensemble.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[288 160]
 [236 388]]
0.6305970149253731
