In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
from sklearn.datasets import load_digits
digits = load_digits()

In [3]:
X = digits.data
y = digits.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.2, random_state = 42)

In [5]:
models = [LogisticRegression, SVC, RandomForestClassifier, DecisionTreeClassifier]
for model in models:
    clf = model()
    clf.fit(X_train, y_train)
    print('{:.2f}'.format(clf.score(X_test, y_test)))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.97
0.99
0.98
0.86


In [6]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits= 3)
kfold

KFold(n_splits=3, random_state=None, shuffle=False)

In [7]:
for train_index, test_index in kfold.split(range(10)):
    print(train_index, test_index)

[4 5 6 7 8 9] [0 1 2 3]
[0 1 2 3 7 8 9] [4 5 6]
[0 1 2 3 4 5 6] [7 8 9]


In [8]:
from sklearn.model_selection import StratifiedKFold
skfold = StratifiedKFold(n_splits=3)

In [9]:
def get_score(model, X_train, X_test, y_train, y_test):
    if model == SVC:
        clf = model(kernel = 'poly')
    elif model == LogisticRegression:
        clf = model(solver = 'newton-cg', max_iter=1000)
    else:
        clf = model()
    clf.fit(X_train, y_train)
    return clf.score(X_test, y_test)

In [10]:
for train_index, test_index in skfold.split(digits.data, digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], \
                                       digits.target[train_index], digits.target[test_index]
    models = [LogisticRegression, SVC, RandomForestClassifier, DecisionTreeClassifier]
    for model in models:
        print('Model: {} score: {:.2f}'.format(model(), get_score(model, X_train, X_test, y_train, y_test)))

Model: LogisticRegression() score: 0.92
Model: SVC() score: 0.96
Model: RandomForestClassifier() score: 0.93
Model: DecisionTreeClassifier() score: 0.75
Model: LogisticRegression() score: 0.94
Model: SVC() score: 0.98
Model: RandomForestClassifier() score: 0.96
Model: DecisionTreeClassifier() score: 0.82
Model: LogisticRegression() score: 0.92
Model: SVC() score: 0.95
Model: RandomForestClassifier() score: 0.92
Model: DecisionTreeClassifier() score: 0.77


In [11]:
from sklearn.model_selection import cross_val_score
cross_val_score(LogisticRegression(solver='newton-cg', max_iter=1000), digits.data, digits.target, cv=3)

array([0.92487479, 0.93823038, 0.92320534])

#### Exercise

In [12]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris

In [13]:
iris = load_iris()

In [14]:
def get_score(model):
    if model == SVC:
        clf = model(kernel = 'poly')
    elif model == LogisticRegression:
        clf = model(solver = 'newton-cg', max_iter=1000)
    else:
        clf = model()
    return cross_val_score(clf, iris.data, iris.target, cv=3)
model = [LogisticRegression, DecisionTreeClassifier, SVC, RandomForestClassifier]
for model in models:
    print('Model: {}, Score: {}'.format(model(), get_score(model)))

Model: LogisticRegression(), Score: [0.98 0.96 0.98]
Model: SVC(), Score: [0.98 0.94 0.98]
Model: RandomForestClassifier(), Score: [0.98 0.94 0.98]
Model: DecisionTreeClassifier(), Score: [0.98 0.92 0.98]


In [16]:
print(cross_val_score(LogisticRegression(solver='newton-cg', max_iter=1000), iris.data, iris.target, cv=3))
print(cross_val_score(SVC(kernel='poly'), iris.data, iris.target, cv=3))
print(cross_val_score(RandomForestClassifier(n_estimators=20), iris.data, iris.target, cv=3))
print(cross_val_score(DecisionTreeClassifier(), iris.data, iris.target, cv=3))

[0.98 0.96 0.98]
[0.98 0.94 0.98]
[0.98 0.94 0.96]
[0.98 0.94 0.98]


In [17]:
from sklearn.model_selection import cross_val_predict