# 5-Fold Cross Validation Tutorial with PyTorch Example

In this tutorial, we will demonstrate how to implement 5-Fold Cross Validation using PyTorch, including a comparison of models with and without Dropout.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split, KFold
from sklearn.datasets import load_iris
import numpy as np

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to tensors
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).long()
y_test = torch.from_numpy(y_test).long()

## Define the Neural Network Models

In [None]:
class Net(nn.Module):
    def __init__(self, dropout=False):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(4, 128)
        self.dropout = nn.Dropout(0.5) if dropout else nn.Identity()
        self.fc2 = nn.Linear(128, 3)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Initialize two models: one with dropout and one without
model_with_dropout = Net(dropout=True)
model_without_dropout = Net(dropout=False)

## Training and Evaluation Functions

In [None]:
def train(model, optimizer, criterion, X_train, y_train):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    return loss.item()

def evaluate(model, X_val, y_val):
    model.eval()
    with torch.no_grad():
        outputs = model(X_val)
        _, predicted = torch.max(outputs, 1)
        correct = (predicted == y_val).sum().item()
        accuracy = correct / X_val.size(0)
    return accuracy

## Perform 5-Fold Cross Validation

In [None]:
# Set up KFold with 5 splits
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Train and evaluate the models
def perform_cross_validation(model, X_train, y_train):
    results = []
    for fold, (train_idx, val_idx) in enumerate(kfold.split(X_train)):
        print(f'Starting fold {fold+1}')
        # Create data subsets for the fold
        X_train_fold = X_train[train_idx]
        y_train_fold = y_train[train_idx]
        X_val_fold = X_train[val_idx]
        y_val_fold = y_train[val_idx]

        # Initialize optimizer and criterion
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()

        # Training and evaluation for one fold
        for epoch in range(50):
            loss = train(model, optimizer, criterion, X_train_fold, y_train_fold)
            if (epoch+1) % 10 == 0:
                accuracy = evaluate(model, X_val_fold, y_val_fold)
                print(f'Epoch {epoch+1}, Loss: {loss}, Accuracy: {accuracy}')

        results.append({'fold': fold+1, 'final_loss': loss, 'accuracy': accuracy})
    return results

results_with_dropout = perform_cross_validation(model_with_dropout, X_train, y_train)
results_without_dropout = perform_cross_validation(model_without_dropout, X_train, y_train)

## Review Results

In [None]:
print('Results with Dropout:')
for result in results_with_dropout:
    print(f"Fold {result['fold']}: Loss: {result['final_loss']}, Accuracy: {result['accuracy']}")

print('\nResults without Dropout:')
for result in results_without_dropout:
    print(f"Fold {result['fold']}: Loss: {result['final_loss']}, Accuracy: {result['accuracy']}")
