In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
import data_provider

Create Model

In [6]:
grid_params = {
    'penalty': ['l2', 'l1'],
    'C': [1.0, 2.0, 3.0],
    'fit_intercept': [True, False],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'max_iter': [100, 10000]
}

for dataset in data_provider.data():

    features_train = dataset['train'][0]
    labels_train = dataset['train'][1]

    features_test = dataset['test'][0]
    labels_test = dataset['test'][1]

    gs = GridSearchCV(
        LogisticRegression(),
        grid_params,
        cv=3,
        n_jobs=-1
    )
    gs_results = gs.fit(features_train, labels_train)
    model = gs_results.best_estimator_
    model.fit(features_train, labels_train)

    predictions = model.predict(features_test)
    print("Model: {}".format(dataset['label']))
    print("Accuracy: {}".format(model.score(features_test, labels_test)))
    fpr, tpr, tresholds = metrics.roc_curve(labels_test, predictions)
    print("AUC: {}".format(metrics.auc(fpr, tpr)))
    print("Precision: {}".format(metrics.precision_score(labels_test, predictions)))
    print("Recall: {}".format(metrics.recall_score(labels_test, predictions)))
    print("ROC: {}".format(metrics.roc_curve(labels_test, predictions)))
    print()

        nan 0.73574782        nan 0.63410151 0.74122146 0.74122146
 0.74513016 0.73731643 0.73809524        nan        nan 0.73574782
        nan 0.73888138 0.74200027 0.74043716 0.74200027 0.64036313
 0.63488399        nan        nan 0.73574782        nan 0.63410151
 0.74200027 0.74200027 0.74200027 0.7373146  0.73418471        nan
        nan 0.73574782        nan 0.73496901 0.73965835 0.74043716
 0.74043533 0.63879818 0.63488399        nan        nan 0.73730727
        nan 0.63488399 0.73965835 0.73965835 0.74043533 0.73653579
 0.73809707        nan        nan 0.73808974        nan 0.73731643
 0.73887038 0.73418288 0.73887038 0.64036313 0.63488399        nan
        nan 0.73808974        nan 0.63410151 0.73887038 0.73965285
 0.73887038 0.73497085 0.7373146         nan        nan 0.73808974
        nan 0.73575149 0.73653029 0.73496168 0.73730727 0.63879818
 0.63488399        nan        nan 0.73730544        nan 0.63488399
 0.73653029 0.73731277 0.73730727 0.73653579 0.73887954       

Model: Wine Quality
Accuracy: 0.765625
AUC: 0.7638643588837867
Precision: 0.7758620689655172
Recall: 0.7894736842105263
ROC: (array([0.        , 0.26174497, 1.        ]), array([0.        , 0.78947368, 1.        ]), array([2, 1, 0]))

Model: Spam Base
Accuracy: 0.9232995658465991
AUC: 0.9161168047171135
Precision: 0.9195402298850575
Recall: 0.8823529411764706
ROC: (array([0.        , 0.05011933, 1.        ]), array([0.        , 0.88235294, 1.        ]), array([2, 1, 0], dtype=int64))

Model: Wine Quality Standard
Accuracy: 0.753125
AUC: 0.7538953648102359
Precision: 0.7839506172839507
Recall: 0.7426900584795322
ROC: (array([0.        , 0.23489933, 1.        ]), array([0.        , 0.74269006, 1.        ]), array([2, 1, 0]))

Model: Spam Base Standard
Accuracy: 0.9290882778581766
AUC: 0.9215349922785343
Precision: 0.9305019305019305
Recall: 0.8860294117647058
ROC: (array([0.        , 0.04295943, 1.        ]), array([0.        , 0.88602941, 1.        ]), array([2, 1, 0], dtype=int64))

