In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
digits = load_digits()
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [3]:
df = pd.DataFrame(digits.data, columns = digits.feature_names )
df["target"] = digits.target
df["image_names"] = df.target.apply(lambda x: digits.target_names[x])
df

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target,image_names
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0,9,9
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0,0,0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0,8,8
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0,9,9


In [4]:
X = df.drop(columns = ["target", "image_names"], axis = 1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)


In [5]:
#logistic_regression

lr = LogisticRegression(max_iter = 10000)
lr.fit(X_train, y_train)
score = lr.score(X_test, y_test)
score

0.9416666666666667

In [6]:
sv = SVC()
sv.fit(X_train, y_train)
score1 = sv.score(X_test, y_test)
score1

0.9833333333333333

In [7]:
rf = RandomForestClassifier(n_estimators = 25, criterion = "entropy")
rf.fit(X_train, y_train)
score2 = rf.score(X_test, y_test)
score2

0.9611111111111111

In [8]:
from sklearn.model_selection import KFold

kf = KFold(n_splits =4)
kf

KFold(n_splits=4, random_state=None, shuffle=False)

In [9]:
for train_index, test_index in kf.split([1, 2, 3, 4, 5]):
    print(train_index, test_index)

[2 3 4] [0 1]
[0 1 3 4] [2]
[0 1 2 4] [3]
[0 1 2 3] [4]


In [10]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)


In [11]:
get_score(lr, X_train, X_test, y_train, y_test)

0.9416666666666667

In [12]:
get_score(sv, X_train, X_test, y_train, y_test)

0.9833333333333333

In [13]:
get_score(rf, X_train, X_test, y_train,  y_test)

0.9527777777777777

In [14]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits = 10)


In [16]:
score_lr = []
score_rf = []
score_sv = []

# Perform KFold cross-validation
for train_index, test_index in kf.split(X):
    X_train, X_test, y_train, y_test = X.iloc[train_index], X.iloc[test_index], y.iloc[train_index], y.iloc[test_index]
    score_lr.append(get_score(lr, X_train, X_test, y_train, y_test))
    score_rf.append(get_score(rf, X_train, X_test, y_train, y_test))
    score_sv.append(get_score(sv, X_train, X_test, y_train, y_test))

# Print the scores
print("Logistic Regression scores:", score_lr)
print("Support Vector Machine scores:", score_sv)
print("Random Forest scores:", score_rf)

Logistic Regression scores: [0.9355555555555556, 0.9242761692650334, 0.955456570155902, 0.9198218262806236]
Support Vector Machine scores: [0.9644444444444444, 0.9665924276169265, 0.9821826280623608, 0.9487750556792873]
Random Forest scores: [0.9422222222222222, 0.8819599109131403, 0.9443207126948775, 0.9242761692650334]
