In [1]:
# # data_loaders.py

# # Import necessary libraries
# import torch
# from torch.utils.data import DataLoader, WeightedRandomSampler
# from torchvision import datasets, transforms

# # Define transformations with data augmentation for training
# train_transform = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.RandomRotation(30),  # randomly rotate images up to 30 degrees
#     transforms.RandomHorizontalFlip(),  # randomly flip images horizontally
#     transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # randomly shift images
#     transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # simulate random zooming
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])

# # Define transformations for validation and test (no augmentation)
# val_transform = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])

# # Load the training dataset with augmentation
# train_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/train', transform=train_transform)

# # Calculate sample weights for the WeightedRandomSampler
# class_counts = [1342, 3876]  # Number of Normal and Pneumonia images
# class_weights = [1.0 / count for count in class_counts]
# targets = [label for _, label in train_dataset]
# sample_weights = [class_weights[label] for label in targets]

# # Create the WeightedRandomSampler
# sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

# # Create DataLoader for training with the sampler
# train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler, num_workers=4)

# # Load the validation dataset without augmentation
# val_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/val', transform=val_transform)
# val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# # Load the test dataset without augmentation
# test_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/test', transform=val_transform)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# # Print dataset sizes
# print(f'Number of images in training set: {len(train_dataset)}')
# print(f'Number of images in validation set: {len(val_dataset)}')
# print(f'Number of images in test set: {len(test_dataset)}')

# if __name__ == "__main__":
#     print("Data loaders for training, validation, and testing with data augmentation and class balancing are set up and ready for use.")


In [2]:
# # data_loaders_and_training.py

# # Import necessary libraries
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from torch.utils.data import DataLoader, WeightedRandomSampler, Subset
# from torchvision import datasets, transforms
# import nbimporter
# from model import PneumoniaDetectionCNN
# from sklearn.model_selection import KFold
# import matplotlib.pyplot as plt
# import csv


# def load_data():
#     # Define transformations with data augmentation for training
#     train_transform = transforms.Compose([
#         transforms.Resize((224, 224)),
#         transforms.RandomRotation(30),  # randomly rotate images up to 30 degrees
#         transforms.RandomHorizontalFlip(),  # randomly flip images horizontally
#         transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # randomly shift images
#         transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # simulate random zooming
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#     ])

#     # Define transformations for validation and test (no augmentation)
#     val_transform = transforms.Compose([
#         transforms.Resize((224, 224)),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#     ])

#     # Load the training dataset with augmentation
#     train_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/train', transform=train_transform)

#     # Calculate sample weights for the WeightedRandomSampler
#     class_counts = [1342, 3876]  # Number of Normal and Pneumonia images
#     class_weights = [1.0 / count for count in class_counts]
#     targets = [label for _, label in train_dataset]
#     sample_weights = [class_weights[label] for label in targets]

#     # Create the WeightedRandomSampler
#     sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

#     # Create DataLoader for training with the sampler
#     train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler, num_workers=4)

#     # Load the validation dataset without augmentation
#     val_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/val', transform=val_transform)
#     val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

#     # Load the test dataset without augmentation
#     test_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/test', transform=val_transform)
#     test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

#     # Print dataset sizes
#     print(f'Number of images in training set: {len(train_dataset)}')
#     print(f'Number of images in validation set: {len(val_dataset)}')
#     print(f'Number of images in test set: {len(test_dataset)}')

#     print("Data loaders for training, validation, and testing with data augmentation and class balancing are set up and ready for use.")

#     return train_dataset, train_loader, val_loader, test_loader

# if __name__ == "__main__":
#     # Call load_data function
#     train_dataset, train_loader, val_loader, test_loader = load_data()
#     print("Data loading completed. Proceeding to training.")

In [3]:
# data_loaders_and_training.py

# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler, Subset
from torchvision import datasets, transforms
import nbimporter
from model import PneumoniaDetectionCNN
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import csv

def load_data():
    # Set the image size
    img_size = 224

    # Define transformations for training with data augmentation
    train_transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
        transforms.Resize((img_size, img_size)),      # Resize to specified image size
        transforms.RandomRotation(30),                # randomly rotate images up to 30 degrees
        transforms.RandomHorizontalFlip(),            # randomly flip images horizontally
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # randomly shift images
        transforms.RandomResizedCrop(img_size, scale=(0.8, 1.0)),  # simulate random zooming
        transforms.ToTensor(),                        # Convert to tensor (scales data to [0, 1])
        transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize to match x / 255 scaling
    ])

    # Define transformations for validation and test (no augmentation)
    val_transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
        transforms.Resize((img_size, img_size)),      # Resize to specified image size
        transforms.ToTensor(),                        # Convert to tensor (scales data to [0, 1])
        transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize to match x / 255 scaling
    ])

    # Load the training dataset with augmentation
    train_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/train', transform=train_transform)

    # Calculate sample weights for the WeightedRandomSampler
    class_counts = [1342, 3876]  # Number of Normal and Pneumonia images
    class_weights = [1.0 / count for count in class_counts]
    targets = [label for _, label in train_dataset]
    sample_weights = [class_weights[label] for label in targets]

    # Create the WeightedRandomSampler
    sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

    # Create DataLoader for training with the sampler
    train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler, num_workers=4)

    # Load the validation dataset without augmentation
    val_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/val', transform=val_transform)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Load the test dataset without augmentation
    test_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/test', transform=val_transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Print dataset sizes
    print(f'Number of images in training set: {len(train_dataset)}')
    print(f'Number of images in validation set: {len(val_dataset)}')
    print(f'Number of images in test set: {len(test_dataset)}')

    print("Data loaders for training, validation, and testing with data augmentation and class balancing are set up and ready for use.")

    return train_dataset, train_loader, val_loader, test_loader

if __name__ == "__main__":
    # Call load_data function
    train_dataset, train_loader, val_loader, test_loader = load_data()
    print("Data loading completed. Proceeding to training.")


Number of images in training set: 5216
Number of images in validation set: 16
Number of images in test set: 624
Data loaders for training, validation, and testing with data augmentation and class balancing are set up and ready for use.
Data loading completed. Proceeding to training.


In [2]:
# data_loaders_and_training.py

# Import necessary libraries
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler, Subset
from torchvision import datasets, transforms
import nbimporter
from model import PneumoniaDetectionCNN
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import csv
from torchvision.transforms import ToPILImage

def load_data():
    # Set the image size
    img_size = 224

    # Define transformations for training with data augmentation
    train_transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
        transforms.Resize((img_size, img_size)),      # Resize to specified image size
        transforms.RandomRotation(30),                # randomly rotate images up to 30 degrees
        transforms.RandomHorizontalFlip(),            # randomly flip images horizontally
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # randomly shift images
        transforms.RandomResizedCrop(img_size, scale=(0.8, 1.0)),  # simulate random zooming
        transforms.ToTensor(),                        # Convert to tensor (scales data to [0, 1])
        transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize to match x / 255 scaling
    ])

    # Define transformations for validation and test (no augmentation)
    val_transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
        transforms.Resize((img_size, img_size)),      # Resize to specified image size
        transforms.ToTensor(),                        # Convert to tensor (scales data to [0, 1])
        transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize to match x / 255 scaling
    ])

    # Load the training dataset with augmentation
    train_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/train', transform=train_transform)

    # Calculate sample weights for the WeightedRandomSampler
    class_counts = [1342, 3876]  # Number of Normal and Pneumonia images
    class_weights = [1.0 / count for count in class_counts]
    targets = [label for _, label in train_dataset]
    sample_weights = [class_weights[label] for label in targets]

    # Create the WeightedRandomSampler
    sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

    # Create DataLoader for training with the sampler
    train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler, num_workers=4)

    # Load the validation dataset without augmentation
    val_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/val', transform=val_transform)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Load the test dataset without augmentation
    test_dataset = datasets.ImageFolder(root='/home/user/persistent/chest_xray/chest_xray/test', transform=val_transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Print dataset sizes
    print(f'Number of images in training set: {len(train_dataset)}')
    print(f'Number of images in validation set: {len(val_dataset)}')
    print(f'Number of images in test set: {len(test_dataset)}')

    print("Data loaders for training, validation, and testing with data augmentation and class balancing are set up and ready for use.")

    return train_dataset, train_loader, val_loader, test_loader

def save_transformed_images(loader, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for i, (images, labels) in enumerate(loader):
        for j in range(images.size(0)):
            img = ToPILImage()(images[j])
            label_dir = os.path.join(output_dir, str(labels[j].item()))
            if not os.path.exists(label_dir):
                os.makedirs(label_dir)
            img.save(os.path.join(label_dir, f'image_{i * loader.batch_size + j}.png'))

if __name__ == "__main__":
    # Call load_data function
    train_dataset, train_loader, val_loader, test_loader = load_data()
    print("Data loading completed. Proceeding to save transformed images.")

    # Define output directories for saving transformed images
    train_output_dir = '/home/user/persistent/chest_xray/chest_xray/augmented/train'
    val_output_dir = '/home/user/persistent/chest_xray/chest_xray/augmented/val'
    test_output_dir = '/home/user/persistent/chest_xray/chest_xray/augmented/test'

    # Save transformed images to the output directories
    print("Saving transformed training images...")
    save_transformed_images(train_loader, train_output_dir)

    print("Saving transformed validation images...")
    save_transformed_images(val_loader, val_output_dir)

    print("Saving transformed test images...")
    save_transformed_images(test_loader, test_output_dir)

    print("All transformed images have been saved.")


Number of images in training set: 5216
Number of images in validation set: 16
Number of images in test set: 624
Data loaders for training, validation, and testing with data augmentation and class balancing are set up and ready for use.
Data loading completed. Proceeding to save transformed images.
Saving transformed training images...
Saving transformed validation images...
Saving transformed test images...
All transformed images have been saved.


In [3]:
# data_loaders_and_training.py

# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
import nbimporter
from model import PneumoniaDetectionCNN
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import csv

def train_model(device):
    # Paths to the transformed data directories
    train_data_path = '/home/user/persistent/chest_xray/chest_xray/augmented/train'
    val_data_path = '/home/user/persistent/chest_xray/chest_xray/augmented/val'
    test_data_path = '/home/user/persistent/chest_xray/chest_xray/augmented/test'

    # Define transformations (already transformed, just converting to tensor and normalizing)
    base_transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Ensure grayscale consistency
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize as per the previous transformation
    ])

    # Load the training, validation, and test datasets from transformed directories
    train_dataset = datasets.ImageFolder(root=train_data_path, transform=base_transform)
    val_dataset = datasets.ImageFolder(root=val_data_path, transform=base_transform)
    test_dataset = datasets.ImageFolder(root=test_data_path, transform=base_transform)

    # Create DataLoader objects
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Print dataset sizes
    print(f'Number of images in training set: {len(train_dataset)}')
    print(f'Number of images in validation set: {len(val_dataset)}')
    print(f'Number of images in test set: {len(test_dataset)}')

    # Define the number of folds for K-Fold Cross-Validation
    k_folds = 5
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    # Lists to store average results across all folds
    fold_accuracies = []
    fold_val_losses = []

    # Iterate over each fold
    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        print(f'\nFold {fold + 1}/{k_folds}')
        
        # Create subsets for this fold
        train_sub = Subset(train_dataset, train_idx)
        val_sub = Subset(train_dataset, val_idx)
        
        # Create DataLoader objects for the current fold
        train_loader = DataLoader(train_sub, batch_size=32, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_sub, batch_size=32, shuffle=False, num_workers=4)
        
        # Initialize the model
        model = PneumoniaDetectionCNN().to(device)
        criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
        optimizer = optim.RMSprop(model.parameters(), lr=0.001)
        
        # Training parameters
        num_epochs = 25
        best_fold_accuracy = 0.0
        
        # Training loop for the current fold
        train_losses = []
        val_losses = []
        val_accuracies = []
        
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
                
                # Zero the gradients
                optimizer.zero_grad()
                
                # Forward pass
                outputs = model(images)
                
                # Calculate loss
                loss = criterion(outputs, labels)
                
                # Backward pass and optimization
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
            
            train_losses.append(running_loss / len(train_loader))
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_losses[-1]:.4f}')
            
            # Validation loop
            model.eval()
            val_loss = 0.0
            correct = 0
            total = 0
            
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
                    outputs = model(images)
                    val_loss += criterion(outputs, labels).item()
                    predicted = (outputs > 0.5).int()
                    total += labels.size(0)
                    correct += (predicted == labels.int()).sum().item()
            
            val_losses.append(val_loss / len(val_loader))
            val_accuracy = 100 * correct / total
            val_accuracies.append(val_accuracy)
            print(f'Validation Loss: {val_losses[-1]:.4f}, Validation Accuracy: {val_accuracy:.2f}%')
            
            # Save the best model for this fold
            if val_accuracy > best_fold_accuracy:
                best_fold_accuracy = val_accuracy
                torch.save(model.state_dict(), f'best_model_fold_{fold + 1}.pth')
                print('Best model for this fold saved!')
        
        fold_accuracies.append(best_fold_accuracy)
        fold_val_losses.append(min(val_losses))

    # Calculate average results across all folds
    average_accuracy = sum(fold_accuracies) / k_folds
    average_val_loss = sum(fold_val_losses) / k_folds

    print(f'\nAverage Validation Accuracy across {k_folds} folds: {average_accuracy:.2f}%')
    print(f'Average Validation Loss across {k_folds} folds: {average_val_loss:.4f}')

    # Save overall results to a CSV file
    with open('kfold_results.csv', 'w', newline='') as csvfile:
        fieldnames = ['fold', 'val_accuracy', 'val_loss']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for fold in range(k_folds):
            writer.writerow({'fold': fold + 1, 'val_accuracy': fold_accuracies[fold], 'val_loss': fold_val_losses[fold]})

    print('K-Fold Cross-Validation complete and results saved.')

# Direct function call for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_model(device)


Number of images in training set: 5216
Number of images in validation set: 16
Number of images in test set: 624

Fold 1/5
Epoch [1/25], Loss: 49.4347
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Best model for this fold saved!
Epoch [2/25], Loss: 50.5725
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [3/25], Loss: 50.4532
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [4/25], Loss: 50.4930
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [5/25], Loss: 50.4930
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [6/25], Loss: 50.4532
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [7/25], Loss: 50.5328
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [8/25], Loss: 50.5328
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [9/25], Loss: 50.4930
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [10/25], Loss: 50.5328
Validation Loss: 51.7045, Validation Accuracy: 47.70%
Epoch [11/25], Loss: 50.4

In [None]:
import matplotlib.pyplot as plt
import csv

# Read the training metrics from the CSV file
train_losses = []
val_losses = []
val_accuracies = []

# Replace 'training_metrics.csv' with your CSV file if you saved the data during training
with open('kfold_results.csv', 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        val_losses.append(float(row['val_loss']))
        val_accuracies.append(float(row['val_accuracy']))

# Generate the plot for losses
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
plt.xlabel('Fold')
plt.ylabel('Loss')
plt.title('Validation Loss Across Folds')
plt.grid(True)
plt.legend()

# Generate the plot for accuracies
plt.subplot(1, 2, 2)
plt.plot(range(1, len(val_accuracies) + 1), val_accuracies, label='Validation Accuracy', color='orange')
plt.xlabel('Fold')
plt.ylabel('Accuracy (%)')
plt.title('Validation Accuracy Across Folds')
plt.grid(True)
plt.legend()

# Show the plots
plt.tight_layout()
plt.show()


In [None]:
# data_loaders_and_training.py

# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
import nbimporter
from model import PneumoniaDetectionCNN
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import csv

def train_model(device):
    # Paths to the transformed data directories
    train_data_path = '/home/user/persistent/chest_xray/chest_xray/augmented/train'
    val_data_path = '/home/user/persistent/chest_xray/chest_xray/augmented/val'
    test_data_path = '/home/user/persistent/chest_xray/chest_xray/augmented/test'

    # Define transformations (already transformed, just converting to tensor and normalizing)
    base_transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Ensure grayscale consistency
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize as per the previous transformation
    ])

    # Load the training, validation, and test datasets from transformed directories
    train_dataset = datasets.ImageFolder(root=train_data_path, transform=base_transform)
    val_dataset = datasets.ImageFolder(root=val_data_path, transform=base_transform)
    test_dataset = datasets.ImageFolder(root=test_data_path, transform=base_transform)

    # Create DataLoader objects
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Print dataset sizes
    print(f'Number of images in training set: {len(train_dataset)}')
    print(f'Number of images in validation set: {len(val_dataset)}')
    print(f'Number of images in test set: {len(test_dataset)}')

    # Define the number of folds for K-Fold Cross-Validation
    k_folds = 5
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    # Lists to store average results across all folds
    fold_accuracies = []
    fold_val_losses = []

    # Iterate over each fold
    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        print(f'\nFold {fold + 1}/{k_folds}')
        
        # Create subsets for this fold
        train_sub = Subset(train_dataset, train_idx)
        val_sub = Subset(train_dataset, val_idx)
        
        # Create DataLoader objects for the current fold
        train_loader = DataLoader(train_sub, batch_size=32, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_sub, batch_size=32, shuffle=False, num_workers=4)
        
        # Initialize the model
        model = PneumoniaDetectionCNN().to(device)
        criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
        optimizer = optim.RMSprop(model.parameters(), lr=0.001)
        
        # Training parameters
        num_epochs = 25
        best_fold_accuracy = 0.0
        
        # Training loop for the current fold
        train_losses = []
        val_losses = []
        val_accuracies = []
        
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
                
                # Zero the gradients
                optimizer.zero_grad()
                
                # Forward pass
                outputs = model(images)
                
                # Calculate loss
                loss = criterion(outputs, labels)
                
                # Backward pass and optimization
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
            
            train_losses.append(running_loss / len(train_loader))
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_losses[-1]:.4f}')
            
            # Validation loop
            model.eval()
            val_loss = 0.0
            correct = 0
            total = 0
            
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
                    outputs = model(images)
                    val_loss += criterion(outputs, labels).item()
                    predicted = (outputs > 0.5).int()
                    total += labels.size(0)
                    correct += (predicted == labels.int()).sum().item()
            
            val_losses.append(val_loss / len(val_loader))
            val_accuracy = 100 * correct / total
            val_accuracies.append(val_accuracy)
            print(f'Validation Loss: {val_losses[-1]:.4f}, Validation Accuracy: {val_accuracy:.2f}%')
            
            # Save the best model for this fold
            if val_accuracy > best_fold_accuracy:
                best_fold_accuracy = val_accuracy
                torch.save(model.state_dict(), f'best_model_fold_{fold + 1}.pth')
                print('Best model for this fold saved!')
        
        fold_accuracies.append(best_fold_accuracy)
        fold_val_losses.append(min(val_losses))

    # Calculate average results across all folds
    average_accuracy = sum(fold_accuracies) / k_folds
    average_val_loss = sum(fold_val_losses) / k_folds

    print(f'\nAverage Validation Accuracy across {k_folds} folds: {average_accuracy:.2f}%')
    print(f'Average Validation Loss across {k_folds} folds: {average_val_loss:.4f}')

    # Save overall results to a CSV file
    with open('kfold_results.csv', 'w', newline='') as csvfile:
        fieldnames = ['fold', 'val_accuracy', 'val_loss']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for fold in range(k_folds):
            writer.writerow({'fold': fold + 1, 'val_accuracy': fold_accuracies[fold], 'val_loss': fold_val_losses[fold]})

    print('K-Fold Cross-Validation complete and results saved.')

# Direct function call for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_model(device)
