In [23]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

In [26]:

# Load the Breast Cancer dataset
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [28]:
# Initialize K-Fold Cross-Validation
num_folds = 5
kf = KFold(n_splits=num_folds)

# Initialize a Random Forest classifier
model = RandomForestClassifier()

# List to store accuracy scores
accuracy_scores = []

# Perform K-Fold Cross-Validation
for train_indices, val_indices in kf.split(X):
    X_train, y_train = X[train_indices], y[train_indices]
    X_val, y_val = X[val_indices], y[val_indices]
    
    # Train the model on the training data
    model.fit(X_train, y_train)
    
    # Make predictions on the validation data
    y_pred = model.predict(X_val)
    
    # Calculate accuracy and store it
    accuracy = accuracy_score(y_val, y_pred)
    accuracy_scores.append(accuracy)

# Calculate average accuracy
average_accuracy = sum(accuracy_scores) / num_folds
print(f'Average accuracy using K-Fold CV: {average_accuracy:.2f}')

Average accuracy using K-Fold CV: 0.96


In [29]:
# Initialize the number of folds
num_folds = 5

# Initialize Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=num_folds)

# Initialize a Random Forest classifier
model = RandomForestClassifier()

# List to store accuracy scores
accuracy_scores = []

# Perform Stratified K-Fold Cross-Validation
for train_indices, val_indices in skf.split(X, y):
    X_train, y_train = X[train_indices], y[train_indices]
    X_val, y_val = X[val_indices], y[val_indices]
    
    # Train the model on the training data
    model.fit(X_train, y_train)
    
    # Make predictions on the validation data
    y_pred = model.predict(X_val)
    
    # Calculate accuracy and store it
    accuracy = accuracy_score(y_val, y_pred)
    accuracy_scores.append(accuracy)

# Calculate average accuracy
average_accuracy = sum(accuracy_scores) / num_folds
print(f'Average accuracy using Stratified K-Fold CV: {average_accuracy:.2f}')

Average accuracy using Stratified K-Fold CV: 0.96


In [30]:
# Initialize Leave-One-Out Cross-Validation
loo = LeaveOneOut()

# Initialize a Random Forest classifier
model = RandomForestClassifier()

# List to store accuracy scores
accuracy_scores = []

# Perform Leave-One-Out Cross-Validation
for train_indices, val_index in loo.split(X):
    X_train, y_train = X[train_indices], y[train_indices]
    X_val, y_val = X[val_index], y[val_index]
    
    # Train the model on the training data
    model.fit(X_train, y_train)
    
    # Make predictions on the validation data
    y_pred = model.predict(X_val)
    
    # Calculate accuracy and store it
    accuracy = accuracy_score(y_val, y_pred)
    accuracy_scores.append(accuracy)

# Calculate average accuracy
average_accuracy = sum(accuracy_scores) / len(X)
print(f'Average accuracy using LOOCV: {average_accuracy:.2f}')

Average accuracy using LOOCV: 0.96
