In [44]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.datasets import load_breast_cancer
digits = load_breast_cancer()

In [45]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size = 0.3)

In [46]:
logmodel = LogisticRegression(max_iter=4000) #Find out why 4000 work? or amount of data needed or something like that
logmodel.fit(X_train, y_train)
logmodel.score(X_test, y_test)

0.9707602339181286

In [47]:
svmmodel = SVC()
svmmodel.fit(X_train, y_train)
svmmodel.score(X_test, y_test)

0.9532163742690059

In [48]:
rfmodel = RandomForestClassifier(n_estimators=40)
rfmodel.fit(X_train, y_train)
rfmodel.score(X_test, y_test)

0.9766081871345029

In [49]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)
kf

KFold(n_splits=5, random_state=None, shuffle=False)

In [50]:
for train_index, test_index in kf.split([1,2,3,4,5,6,7,8,9]):
  print(train_index, test_index)

[2 3 4 5 6 7 8] [0 1]
[0 1 4 5 6 7 8] [2 3]
[0 1 2 3 6 7 8] [4 5]
[0 1 2 3 4 5 8] [6 7]
[0 1 2 3 4 5 6 7] [8]


In [51]:
def get_score(model, X_train, X_test, y_train, y_test):
  model.fit(X_train, y_train)
  return model.score(X_test, y_test)

In [52]:
get_score(LogisticRegression(max_iter=3000), X_train, X_test, y_train, y_test)

0.9707602339181286

In [53]:
get_score(SVC(), X_train, X_test, y_train, y_test)

0.9532163742690059

In [54]:
from sklearn.base import estimator_html_repr
get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test)

0.9590643274853801

In [55]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=3)

In [56]:
score_log = []
score_svm = []
score_rf = []

for train_index, test_index in folds.split(digits.data, digits.target):
  X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], digits.target[train_index], digits.target[test_index]
  print(get_score(LogisticRegression(max_iter=4000), X_train, X_test, y_train, y_test))
  print(get_score(SVC(), X_train, X_test, y_train, y_test))
  print(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

0.9421052631578948
0.8526315789473684
0.9526315789473684
0.9631578947368421
0.9315789473684211
0.9736842105263158
0.9470899470899471
0.9470899470899471
0.9735449735449735


In [57]:
score_log = []
score_svm = []
score_rf = []

for train_index, test_index in folds.split(digits.data, digits.target):
  X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], digits.target[train_index], digits.target[test_index]
  score_log.append(get_score(LogisticRegression(max_iter=4000), X_train, X_test, y_train, y_test))
  score_svm.append(get_score(SVC(), X_train, X_test, y_train, y_test))
  score_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [58]:
score_log

[0.9421052631578948, 0.9631578947368421, 0.9470899470899471]

In [59]:
score_svm

[0.8526315789473684, 0.9315789473684211, 0.9470899470899471]

In [60]:
score_rf

[0.9421052631578948, 0.9578947368421052, 0.9629629629629629]

In [61]:
from sklearn.model_selection import cross_val_score

In [62]:
cross_val_score(LogisticRegression(max_iter=4000), digits.data, digits.target)

array([0.93859649, 0.94736842, 0.98245614, 0.92982456, 0.96460177])

In [63]:
cross_val_score(SVC(), digits.data, digits.target)

array([0.85087719, 0.89473684, 0.92982456, 0.94736842, 0.9380531 ])

In [64]:
cross_val_score(RandomForestClassifier(n_estimators=40), digits.data, digits.target)

array([0.92982456, 0.94736842, 0.98245614, 0.95614035, 0.97345133])