In [1]:
from sklearn.datasets import load_digits
import pandas as pd
import numpy as np

In [2]:
digits = load_digits()

In [3]:
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [4]:
df = pd.DataFrame(digits.data)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,9.0,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0


In [5]:
df['target'] = digits.target

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
(xtrain, xtest, ytrain, ytest) = train_test_split(digits.data, digits.target, test_size=0.2)

In [8]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [9]:
lr.fit(xtrain, ytrain)

LinearRegression()

In [10]:
lr.score(xtest, ytest)

0.5702894627288408

In [11]:
from sklearn.linear_model import LogisticRegression
lgr = LogisticRegression(max_iter=100000)

In [12]:
lgr.fit(xtrain, ytrain)

LogisticRegression(max_iter=100000)

In [13]:
lgr.score(xtest, ytest)

0.9611111111111111

In [14]:
from sklearn.tree import DecisionTreeClassifier

In [15]:
dtc = DecisionTreeClassifier()

In [16]:
dtc.fit(xtrain, ytrain)

DecisionTreeClassifier()

In [17]:
dtc.score(xtest, ytest)

0.8555555555555555

In [18]:
from sklearn.ensemble import RandomForestClassifier

In [19]:
rfc = RandomForestClassifier(max_features=.1)

In [20]:
rfc.fit(xtrain, ytrain)

RandomForestClassifier(max_features=0.1)

In [21]:
rfc.score(xtest, ytest)

0.975

In [22]:
from sklearn.model_selection import KFold

In [23]:
kf = KFold()

In [24]:
for train_index, test_index in kf.split([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]):
  print(train_index, test_index)

[2 3 4 5 6 7 8 9] [0 1]
[0 1 4 5 6 7 8 9] [2 3]
[0 1 2 3 6 7 8 9] [4 5]
[0 1 2 3 4 5 8 9] [6 7]
[0 1 2 3 4 5 6 7] [8 9]


In [25]:
def get_score(model):
  model.fit(xtrain, ytrain)
  return model.score(xtest, ytest)

In [26]:
get_score(lr)

0.5702894627288408

In [27]:
get_score(lgr)

0.9611111111111111

In [28]:
get_score(dtc)

0.8472222222222222

In [29]:
get_score(rfc)

0.9722222222222222

In [30]:
from sklearn.model_selection import StratifiedKFold

In [31]:
skf = StratifiedKFold()

In [32]:
score_lr = []
score_lgr = []
score_dtc = []
score_rfc = []

In [33]:
for train_index, test_index in skf.split(digits.data, digits.target):
    (xtrain, xtest, ytrain, ytest) = (digits.data[train_index], digits.data[test_index], digits.target[train_index], digits.target[test_index])
    score_lr.append(get_score(lr))
    score_lgr.append(get_score(lgr))
    score_dtc.append(get_score(dtc))
    score_rfc.append(get_score(rfc))

In [34]:
score_lr, score_lgr, score_dtc, score_rfc

([0.48213228097779004,
  0.5520035941487935,
  0.5692463893009454,
  0.5104744723295074,
  0.4168893138022042],
 [0.925,
  0.8777777777777778,
  0.9387186629526463,
  0.9331476323119777,
  0.8969359331476323],
 [0.7611111111111111,
  0.725,
  0.7938718662952646,
  0.8217270194986073,
  0.8022284122562674],
 [0.9222222222222223,
  0.9222222222222223,
  0.9610027855153204,
  0.9665738161559888,
  0.9275766016713092])

In [35]:
from sklearn.model_selection import cross_val_score

In [36]:
x = digits.data
y = digits.target

In [37]:
cross_val_score(lr, x, y)

array([0.48346048, 0.5583603 , 0.57534522, 0.5056632 , 0.40995457])

In [38]:
cross_val_score(lgr, x, y)

array([0.925     , 0.87777778, 0.93871866, 0.93314763, 0.89693593])