In [23]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import load_digits
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [24]:
digits = load_digits()


In [25]:
x_train, x_test, y_train, y_test = train_test_split(digits.data,
                                                    digits.target,
                                                    test_size = 0.3)

In [26]:
print(len(x_train)) # train data

1257


In [27]:
print(len(y_train)) # train labels

1257


In [28]:
print(len(x_test)) # test data

540


In [29]:
print(len(y_test)) # test labels

540


In [30]:
# model 1
lr = LogisticRegression()
lr.fit(x_train, y_train)
lr.score(x_test, y_test)

0.9648148148148148

In [31]:
# model 2
svm = SVC()
svm.fit(x_train, y_train)
svm.score(x_test, y_test)

0.29814814814814816

In [33]:
# model 3
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=0)
clf.fit(x_train, y_train)
clf.score(x_test, y_test)

0.8407407407407408

In [38]:
# model 4
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(x_train, y_train)
neigh.score(x_test, y_test)

0.987037037037037

## Cross validation

In [39]:
from sklearn.model_selection import KFold

In [48]:
k = KFold(n_splits=4)
k

KFold(n_splits=4, random_state=None, shuffle=False)

In [53]:
def get_score(model, x_train, x_test, y_train, y_test):
    model.fit(x_train, y_train)
    return model.score(x_test, y_test)

In [54]:
get_score(LogisticRegression(), x_train, x_test, y_train, y_test)

0.9648148148148148

In [55]:
get_score(SVC(), x_train, x_test, y_train, y_test)

0.29814814814814816

In [None]:
get_score(LogisticRegression(), x_train, x_test, y_train, y_test)

In [56]:
from sklearn.model_selection import StratifiedKFold
k = StratifiedKFold(n_splits=4)

In [57]:
k

StratifiedKFold(n_splits=4, random_state=None, shuffle=False)

In [62]:
scores_lr = []
scores_svc = []
scores_dt = []
scores_knc = []

for train_index, test_index in k.split(digits.data, digits.target):
    x_train, x_test, y_train, y_test = digits.data[train_index], \
    digits.data[test_index],\
    digits.target[train_index],\
    digits.target[test_index]
    scores_lr.append(get_score(LogisticRegression(), 
                                x_train, 
                                x_test, 
                                y_train, 
                                y_test))

    scores_svc.append(get_score(SVC(), 
                                x_train, 
                                x_test, 
                                y_train, 
                                y_test))
    scores_dt.append(get_score(DecisionTreeClassifier(), 
                                x_train, 
                                x_test, 
                                y_train, 
                                y_test))
    scores_knc.append(get_score(KNeighborsClassifier(), 
                                x_train, 
                                x_test, 
                                y_train, 
                                y_test))


In [65]:
scores_lr

[0.9361233480176211, 0.9201773835920177, 0.9642058165548099, 0.903370786516854]

In [66]:
scores_svc

[0.41409691629955947,
 0.3991130820399113,
 0.4004474272930649,
 0.5235955056179775]

In [67]:
scores_dt

[0.801762114537445, 0.7361419068736141, 0.814317673378076, 0.7617977528089888]

In [68]:
scores_knc

[0.9493392070484582,
 0.9623059866962306,
 0.9731543624161074,
 0.9640449438202248]

In [69]:
# another built-in method

from sklearn.model_selection import cross_val_score

cross_val_score(LogisticRegression(), digits.data, digits.target)

array([0.89534884, 0.94991653, 0.90939597])