In [85]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler


In [86]:

# Section: Loading Dataset 
digits = load_digits()


In [87]:

# Initial Model Training and Evaluation (Train/Test Split) 
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.3)


In [88]:

# Logistic Regression

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)
lr_score = lr.score(X_test_scaled, y_test)
print(f"Logistic Regression Score: {lr_score}")


Logistic Regression Score: 0.9629629629629629


In [89]:

# SVM
svm = SVC()
svm.fit(X_train, y_train)
svm_score = svm.score(X_test, y_test)
print(f"SVM Score: {svm_score}")

SVM Score: 0.9907407407407407


In [90]:


# Random Forest
rf = RandomForestClassifier(n_estimators=40)
rf.fit(X_train, y_train)
rf_score = rf.score(X_test, y_test)
print(f"Random Forest Score: {rf_score}")


Random Forest Score: 0.9685185185185186


In [91]:

# K-Fold Cross-Validation 
kf = KFold(n_splits=3)

def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)


In [92]:

# Scores with KFold
scores_lr = []
scores_svm = []
scores_rf = []
for train_index, test_index in kf.split(digits.data):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], \
                                       digits.target[train_index], digits.target[test_index]
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    scores_lr.append(get_score(LogisticRegression(max_iter=1000), X_train_scaled, X_test_scaled, y_train, y_test))
    scores_svm.append(get_score(SVC(), X_train, X_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(), X_train, X_test, y_train, y_test))

print("\nK-Fold Cross Validation Scores:")
print(f"Logistic Regression Scores: {scores_lr}")
print(f"SVM Scores: {scores_svm}")
print(f"Random Forest Scores: {scores_rf}")


K-Fold Cross Validation Scores:
Logistic Regression Scores: [0.9265442404006677, 0.9415692821368948, 0.9265442404006677]
SVM Scores: [0.9666110183639399, 0.9816360601001669, 0.9549248747913188]
Random Forest Scores: [0.9382303839732888, 0.9532554257095158, 0.9232053422370617]


In [93]:
kf = KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [94]:
for train_index, test_index in kf.split(range(10)):
    print("TRAIN:", train_index, "TEST:", test_index)

TRAIN: [4 5 6 7 8 9] TEST: [0 1 2 3]
TRAIN: [0 1 2 3 7 8 9] TEST: [4 5 6]
TRAIN: [0 1 2 3 4 5 6] TEST: [7 8 9]


In [95]:
def get_score(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [96]:
get_score(LogisticRegression(max_iter=1000), X_train, y_train, X_test, y_test)

0.9148580968280468