# First Attempt

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

dataset_path = 'D:\Inteligent_Agents\kvasir-dataset-v2'  
train_ratio = 0.8  # 80% for trainning, 20% fro validation

# Created Train and val directories
train_dir = os.path.join(dataset_path, 'train')
val_dir = os.path.join(dataset_path, 'val')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Classes
classes = ['dyed-lifted-polyps', 'dyed-resection-margins', 'esophagitis', 'normal-cecum',
           'normal-pylorus', 'normal-z-line', 'polyps', 'ulcerative-colitis']

for cls in classes:
    class_path = os.path.join(dataset_path, cls)
    images = [f for f in os.listdir(class_path) if f.endswith('.jpg')]
    
    # Divide in train and val
    train_imgs, val_imgs = train_test_split(images, train_size=train_ratio, random_state=42)
    
    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)
    
    for img in train_imgs:
        src = os.path.join(class_path, img)
        dst = os.path.join(train_dir, cls, img)
        shutil.move(src, dst)
    
    for img in val_imgs:
        src = os.path.join(class_path, img)
        dst = os.path.join(val_dir, cls, img)
        shutil.move(src, dst)

print("División train/val completada.")

  dataset_path = 'D:\Inteligent_Agents\kvasir-dataset-v2'  # Ajusta esta ruta según tu ubicación


División train/val completada.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, matthews_corrcoef
import numpy as np
import os

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, matthews_corrcoef
import numpy as np
import os
import psutil  # To monitor memory (optional, install with `pip install psutil`)

# Set device to CPU (no GPU available)
device = torch.device("cpu")

# Define preprocessing and data augmentation (simplified)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to reduce memory
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load dataset (ensure train/val split is done)
data_dir = 'D:/Inteligent_Agents/kvasir-dataset-v2'  
try:
    image_datasets = {
        'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), data_transforms['train']),
        'val': datasets.ImageFolder(os.path.join(data_dir, 'val'), data_transforms['val'])
    }
    dataloaders = {
        'train': DataLoader(image_datasets['train'], batch_size=8, shuffle=True, num_workers=0),
        'val': DataLoader(image_datasets['val'], batch_size=8, shuffle=False, num_workers=0)
    }
    class_names = image_datasets['train'].classes
    num_classes = len(class_names)
except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

# Load a lighter model (ResNet-18 instead of ResNet-50)
model = models.resnet18(pretrained=True)  # Changed to ResNet-18
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Training loop
def train_model(model, criterion, optimizer, num_epochs=2):  # Reduced epochs 
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(image_datasets['train'])
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        
        # Monitor memory usage
        memory = psutil.virtual_memory()
        print(f"Memory Usage: {memory.percent}%")

        # Validation
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for inputs, labels in dataloaders['val']:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        # Compute metrics
        cm = confusion_matrix(all_labels, all_preds)
        precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')
        acc = np.sum(np.diag(cm)) / np.sum(cm)
        specificity = np.mean([cm[i,i] / (cm[i,i] + np.sum(cm[:,i]) - cm[i,i]) for i in range(num_classes)])
        mcc = matthews_corrcoef(all_labels, all_preds)
        
        # Per-class TP, TN, FP, FN
        for i, class_name in enumerate(class_names):
            tp = cm[i,i]
            fp = np.sum(cm[:,i]) - tp
            fn = np.sum(cm[i,:]) - tp
            tn = np.sum(cm) - (tp + fp + fn)
            print(f'\nClass: {class_name}')
            print(f'TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}')
        
        print(f'\nEpoch {epoch+1} Metrics:')
        print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, Specificity: {specificity:.4f}, Accuracy: {acc:.4f}, MCC: {mcc:.4f}, F1: {f1:.4f}')

try:
    train_model(model, criterion, optimizer, num_epochs=2)
except Exception as e:
    print(f"Error occurred: {e}")

torch.save(model.state_dict(), 'kvasir_resnet18.pth')



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\lloyd/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:02<00:00, 20.6MB/s]


Epoch 1/2, Loss: 0.5807
Memory Usage: 80.1%

Class: dyed-lifted-polyps
TP: 172, TN: 1384, FP: 16, FN: 28

Class: dyed-resection-margins
TP: 184, TN: 1382, FP: 18, FN: 16

Class: esophagitis
TP: 174, TN: 1332, FP: 68, FN: 26

Class: normal-cecum
TP: 169, TN: 1398, FP: 2, FN: 31

Class: normal-pylorus
TP: 197, TN: 1392, FP: 8, FN: 3

Class: normal-z-line
TP: 130, TN: 1376, FP: 24, FN: 70

Class: polyps
TP: 195, TN: 1355, FP: 45, FN: 5

Class: ulcerative-colitis
TP: 189, TN: 1391, FP: 9, FN: 11

Epoch 1 Metrics:
Precision: 0.8882, Recall: 0.8812, Specificity: 0.8882, Accuracy: 0.8812, MCC: 0.8655, F1: 0.8805
Epoch 2/2, Loss: 0.3187
Memory Usage: 84.0%

Class: dyed-lifted-polyps
TP: 172, TN: 1390, FP: 10, FN: 28

Class: dyed-resection-margins
TP: 191, TN: 1373, FP: 27, FN: 9

Class: esophagitis
TP: 153, TN: 1367, FP: 33, FN: 47

Class: normal-cecum
TP: 195, TN: 1393, FP: 7, FN: 5

Class: normal-pylorus
TP: 200, TN: 1391, FP: 9, FN: 0

Class: normal-z-line
TP: 167, TN: 1356, FP: 44, FN: 33


# Second Attempt

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from sklearn.metrics import confusion_matrix, matthews_corrcoef
import numpy as np
import os
import psutil 

device = torch.device("cpu")

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),  
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = 'D:/Inteligent_Agents/kvasir-dataset-v2'  
try:
    image_datasets = {
        'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), data_transforms['train']),
        'val': datasets.ImageFolder(os.path.join(data_dir, 'val'), data_transforms['val'])
    }
    dataloaders = {
        'train': DataLoader(image_datasets['train'], batch_size=8, shuffle=True, num_workers=0),
        'val': DataLoader(image_datasets['val'], batch_size=8, shuffle=False, num_workers=0)
    }
    class_names = image_datasets['train'].classes
    num_classes = len(class_names)
except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

# ResNet-18 
model = models.resnet18(pretrained=True)  
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Function to calculate metrics for a class
def calculate_class_metrics(tp, tn, fp, fn, class_name):
    total = tp + tn + fp + fn
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    accuracy = (tp + tn) / total if total > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    mcc = ((tp * tn) - (fp * fn)) / ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) ** 0.5 if total > 0 else 0
    
    print(f'\nClass: {class_name}')
    print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"MCC: {mcc:.4f}")

# Training loop
def train_model(model, criterion, optimizer, num_epochs=3):  
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(image_datasets['train'])
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        
        # Optional: Monitor memory usage
        memory = psutil.virtual_memory()
        print(f"Memory Usage: {memory.percent}%")

        # Validation
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for inputs, labels in dataloaders['val']:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        cm = confusion_matrix(all_labels, all_preds)
        
        for i, class_name in enumerate(class_names):
            tp = cm[i, i]
            fp = np.sum(cm[:, i]) - tp
            fn = np.sum(cm[i, :]) - tp
            tn = np.sum(cm) - (tp + fp + fn)
            calculate_class_metrics(tp, tn, fp, fn, class_name)
        
        # Calculate and print macro-averaged metrics
        precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')
        acc = np.sum(np.diag(cm)) / np.sum(cm)
        specificity = np.mean([cm[i,i] / (cm[i,i] + np.sum(cm[:,i]) - cm[i,i]) for i in range(num_classes)])
        mcc = matthews_corrcoef(all_labels, all_preds)
        
        print(f'\nEpoch {epoch+1} Metrics (Macro-Averaged):')
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"Specificity: {specificity:.4f}")
        print(f"Accuracy: {acc:.4f}")
        print(f"MCC: {mcc:.4f}")
        print(f"F1 Score: {f1:.4f}")

# Train the model with error handling
try:
    train_model(model, criterion, optimizer, num_epochs=2)
except Exception as e:
    print(f"Error occurred: {e}")

# Save the model
torch.save(model.state_dict(), 'kvasir_resnet18.pth')



Epoch 1/2, Loss: 0.5761
Memory Usage: 65.1%

Class: dyed-lifted-polyps
TP: 144, TN: 1397, FP: 3, FN: 56
Precision: 0.9796
Recall: 0.7200
Specificity: 0.9979
Accuracy: 0.9631
F1 Score: 0.8300
MCC: 0.8219

Class: dyed-resection-margins
TP: 198, TN: 1347, FP: 53, FN: 2
Precision: 0.7888
Recall: 0.9900
Specificity: 0.9621
Accuracy: 0.9656
F1 Score: 0.8780
MCC: 0.8658

Class: esophagitis
TP: 143, TN: 1373, FP: 27, FN: 57
Precision: 0.8412
Recall: 0.7150
Specificity: 0.9807
Accuracy: 0.9475
F1 Score: 0.7730
MCC: 0.7467

Class: normal-cecum
TP: 198, TN: 1380, FP: 20, FN: 2
Precision: 0.9083
Recall: 0.9900
Specificity: 0.9857
Accuracy: 0.9862
F1 Score: 0.9474
MCC: 0.9406

Class: normal-pylorus
TP: 198, TN: 1395, FP: 5, FN: 2
Precision: 0.9754
Recall: 0.9900
Specificity: 0.9964
Accuracy: 0.9956
F1 Score: 0.9826
MCC: 0.9802

Class: normal-z-line
TP: 175, TN: 1344, FP: 56, FN: 25
Precision: 0.7576
Recall: 0.8750
Specificity: 0.9600
Accuracy: 0.9494
F1 Score: 0.8121
MCC: 0.7857

Class: polyps
TP: 

# Fourth Attempt 

The following dataset split is used for further attemps, so no need to divide again

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

dataset_path = 'D:\Intelligent_Agents2\kvasir-dataset-v2'  
train_ratio = 0.7  # 70% for training
val_test_ratio = 0.15  # 15% for validation y 15% for testing

train_dir = os.path.join(dataset_path, 'train')
val_dir = os.path.join(dataset_path, 'val')
test_dir = os.path.join(dataset_path, 'test')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

classes = ['dyed-lifted-polyps', 'dyed-resection-margins', 'esophagitis', 'normal-cecum',
           'normal-pylorus', 'normal-z-line', 'polyps', 'ulcerative-colitis']

for cls in classes:
    class_path = os.path.join(dataset_path, cls)
    images = [f for f in os.listdir(class_path) if f.endswith('.jpg')]
    
    # First divide in train (70%) and the rest (30%)
    train_imgs, temp_imgs = train_test_split(images, train_size=train_ratio, random_state=42)
    
    # Divide the rest (30%) in val (15%) and test (15%)
    val_imgs, test_imgs = train_test_split(temp_imgs, train_size=0.5, random_state=42)  # 50% de 30% = 15%
    
    # Create subfolders for every class in train, val and test
    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(test_dir, cls), exist_ok=True)
    
    for img in train_imgs:
        src = os.path.join(class_path, img)
        dst = os.path.join(train_dir, cls, img)
        shutil.move(src, dst)
    
    for img in val_imgs:
        src = os.path.join(class_path, img)
        dst = os.path.join(val_dir, cls, img)
        shutil.move(src, dst)
    
    for img in test_imgs:
        src = os.path.join(class_path, img)
        dst = os.path.join(test_dir, cls, img)
        shutil.move(src, dst)

print("Division train/val/test completed (70/15/15).")

  dataset_path = 'D:\Intelligent_Agents2\kvasir-dataset-v2'


Division train/val/test completed (70/15/15).


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from sklearn.metrics import confusion_matrix, matthews_corrcoef
import numpy as np
import os
import psutil  # To monitor memory
import matplotlib.pyplot as plt
import shutil
from sklearn.model_selection import train_test_split

In [None]:
device = torch.device("cpu")

# Define preprocessing and data augmentation (simplified)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to reduce memory
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Function to split data if not already done
def split_dataset(dataset_path):
    train_dir = os.path.join(dataset_path, 'train')
    val_dir = os.path.join(dataset_path, 'val')
    test_dir = os.path.join(dataset_path, 'test')
    
    if not (os.path.exists(train_dir) and os.path.exists(val_dir) and os.path.exists(test_dir)):
        print("Splitting dataset into train/val/test (70/15/15)...")
        train_ratio = 0.7
        val_test_ratio = 0.15
        
        os.makedirs(train_dir, exist_ok=True)
        os.makedirs(val_dir, exist_ok=True)
        os.makedirs(test_dir, exist_ok=True)

        classes = ['dyed-lifted-polyps', 'dyed-resection-margins', 'esophagitis', 'normal-cecum',
                   'normal-pylorus', 'normal-z-line', 'polyps', 'ulcerative-colitis']

        for cls in classes:
            class_path = os.path.join(dataset_path, cls)
            images = [f for f in os.listdir(class_path) if f.endswith('.jpg')]
            
            train_imgs, temp_imgs = train_test_split(images, train_size=train_ratio, random_state=42)
            val_imgs, test_imgs = train_test_split(temp_imgs, train_size=0.5, random_state=42)
            
            os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
            os.makedirs(os.path.join(val_dir, cls), exist_ok=True)
            os.makedirs(os.path.join(test_dir, cls), exist_ok=True)
            
            for img in train_imgs:
                src = os.path.join(class_path, img)
                dst = os.path.join(train_dir, cls, img)
                shutil.move(src, dst)
            
            for img in val_imgs:
                src = os.path.join(class_path, img)
                dst = os.path.join(val_dir, cls, img)
                shutil.move(src, dst)
            
            for img in test_imgs:
                src = os.path.join(class_path, img)
                dst = os.path.join(test_dir, cls, img)
                shutil.move(src, dst)

        print("División train/val/test completada (70/15/15).")
    else:
        print("Train, val, and test folders already exist. Skipping data split.")

# Load dataset
data_dir = 'D:\Intelligent_Agents2\kvasir-dataset-v2'
split_dataset(data_dir)
try:
    image_datasets = {
        'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), data_transforms['train']),
        'val': datasets.ImageFolder(os.path.join(data_dir, 'val'), data_transforms['val']),
        'test': datasets.ImageFolder(os.path.join(data_dir, 'test'), data_transforms['test'])
    }
    dataloaders = {
        'train': DataLoader(image_datasets['train'], batch_size=8, shuffle=True, num_workers=0),
        'val': DataLoader(image_datasets['val'], batch_size=8, shuffle=False, num_workers=0),
        'test': DataLoader(image_datasets['test'], batch_size=8, shuffle=False, num_workers=0)
    }
    class_names = image_datasets['train'].classes
    num_classes = len(class_names)
except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

# Load a lighter model (ResNet-18 instead of ResNet-50)
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Function to calculate metrics for a class
def calculate_class_metrics(tp, tn, fp, fn, class_name, phase):
    total = tp + tn + fp + fn
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    accuracy = (tp + tn) / total if total > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    mcc = ((tp * tn) - (fp * fn)) / ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) ** 0.5 if total > 0 else 0
    
    print(f'\n{phase} - Class: {class_name}')
    print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"MCC: {mcc:.4f}")
    
    return {"Precision": precision, "Recall": recall, "Specificity": specificity, 
            "Accuracy": accuracy, "F1 Score": f1, "MCC": mcc}

# Training and evaluation loop
def train_and_evaluate(model, criterion, optimizer, num_epochs=3):
    # Store metrics for all epochs and phases
    all_class_metrics = {class_name: {"train": [], "val": [], "test": []} for class_name in class_names}
    macro_metrics = {"train": [], "val": [], "test": []}
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        all_train_preds, all_train_labels = [], []
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            all_train_preds.extend(preds.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())
        epoch_loss = running_loss / len(image_datasets['train'])
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        memory = psutil.virtual_memory()
        print(f"Memory Usage: {memory.percent}%")

        # Validation and Test phases
        for phase in ['val', 'test']:
            model.eval()
            all_preds, all_labels = [], []
            with torch.no_grad():
                for inputs, labels in dataloaders[phase]:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    all_preds.extend(preds.cpu().numpy())
                    all_labels.extend(labels.cpu().numpy())
            
            # Compute confusion matrix
            cm = confusion_matrix(all_labels, all_preds)
            
            # Calculate and store per-class metrics
            epoch_class_metrics = {}
            for i, class_name in enumerate(class_names):
                tp = cm[i, i]
                fp = np.sum(cm[:, i]) - tp
                fn = np.sum(cm[i, :]) - tp
                tn = np.sum(cm) - (tp + fp + fn)
                metrics = calculate_class_metrics(tp, tn, fp, fn, class_name, phase)
                epoch_class_metrics[class_name] = metrics
            all_class_metrics[class_name][phase].append(metrics)
            
            # Calculate and store macro-averaged metrics
            precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')
            acc = np.sum(np.diag(cm)) / np.sum`(cm)
            specificity = np.mean([cm[i,i] / (cm[i,i] + np.sum(cm[:,i]) - cm[i,i]) for i in range(num_classes)])
            mcc = matthews_corrcoef(all_labels, all_preds)
            macro_metrics[phase].append({"Precision": precision, "Recall": recall, "Specificity": specificity, 
                                       "Accuracy": acc, "MCC": mcc, "F1 Score": f1})
            
            print(f'\nEpoch {epoch+1} {phase.upper()} Metrics (Macro-Averaged):')
            print(f"Precision: {precision:.4f}")
            print(f"Recall: {recall:.4f}")
            print(f"Specificity: {specificity:.4f}")
            print(f"Accuracy: {acc:.4f}")
            print(f"MCC: {mcc:.4f}")
            print(f"F1 Score: {f1:.4f}")

    plt.figure(figsize=(12, 6))
    metrics_to_plot = ["Precision", "Recall", "Specificity", "Accuracy", "F1 Score", "MCC"]
    bar_width = 0.1
    index = np.arange(len(class_names) + 1)  # +1 for macro-averaged
    
    for i, metric in enumerate(metrics_to_plot):
        values = [all_class_metrics[class_name]["test"][-1][metric] for class_name in class_names]
        values.append(macro_metrics["test"][-1][metric])  # Test macro-averaged
        plt.bar(index + i * bar_width, values, bar_width, label=metric)

    plt.xlabel('Classes and Macro-Averaged (Test Set)')
    plt.ylabel('Metric Value')
    plt.title('Comparison of Metrics Across Classes and Macro-Averaged (Final Epoch, Test Set)')
    plt.xticks(index + bar_width * (len(metrics_to_plot) - 1) / 2, class_names + ['Macro-Averaged'])
    plt.legend()
    plt.tight_layout()
    plt.show()

try:
    train_and_evaluate(model, criterion, optimizer, num_epochs=3)
except Exception as e:
    print(f"Error occurred: {e}")

torch.save(model.state_dict(), 'kvasir_resnet18.pth')

  data_dir = 'D:\Intelligent_Agents2\kvasir-dataset-v2'


Train, val, and test folders already exist. Skipping data split.
Epoch 1/3, Loss: 0.5929
Memory Usage: 66.3%

val - Class: dyed-lifted-polyps
TP: 142, TN: 996, FP: 54, FN: 8
Precision: 0.7245
Recall: 0.9467
Specificity: 0.9486
Accuracy: 0.9483
F1 Score: 0.8208
MCC: 0.8009

val - Class: dyed-resection-margins
TP: 97, TN: 1048, FP: 2, FN: 53
Precision: 0.9798
Recall: 0.6467
Specificity: 0.9981
Accuracy: 0.9542
F1 Score: 0.7791
MCC: 0.7750

val - Class: esophagitis
TP: 72, TN: 1050, FP: 0, FN: 78
Precision: 1.0000
Recall: 0.4800
Specificity: 1.0000
Accuracy: 0.9350
F1 Score: 0.6486
MCC: 0.6684

val - Class: normal-cecum
TP: 141, TN: 1041, FP: 9, FN: 9
Precision: 0.9400
Recall: 0.9400
Specificity: 0.9914
Accuracy: 0.9850
F1 Score: 0.9400
MCC: 0.9314

val - Class: normal-pylorus
TP: 150, TN: 1047, FP: 3, FN: 0
Precision: 0.9804
Recall: 1.0000
Specificity: 0.9971
Accuracy: 0.9975
F1 Score: 0.9901
MCC: 0.9887

val - Class: normal-z-line
TP: 150, TN: 973, FP: 77, FN: 0
Precision: 0.6608
Recall

<Figure size 1200x600 with 0 Axes>

# Fifth attempt

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from sklearn.metrics import confusion_matrix, matthews_corrcoef, precision_recall_fscore_support
import numpy as np
import os
import psutil  # To monitor memory
import matplotlib.pyplot as plt
import shutil
from sklearn.model_selection import train_test_split
import cv2
from PIL import Image

In [15]:
device = torch.device("cpu")

# Equalization function
def histogram_equalization(image):
    # Convert PIL Image to numpy array and ensure RGB
    img_array = np.array(image).astype(np.uint8)
    if len(img_array.shape) == 3:  
        img_yuv = cv2.cvtColor(img_array, cv2.COLOR_RGB2YUV)
        img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])  
        img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2RGB)
    else:  
        img_output = cv2.equalizeHist(img_array)
    return Image.fromarray(img_output)

# Preprocessing 
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.8, 1.2)),  # Random zoom (scale 0.8x to 1.2x)
        transforms.Lambda(histogram_equalization),  # Histogram equalization
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.Lambda(histogram_equalization),  # Histogram equalization
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.Lambda(histogram_equalization),  # Histogram equalization
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load dataset
data_dir = 'D:\Intelligent_Agents2\kvasir-dataset-v2'
split_dataset(data_dir)
try:
    image_datasets = {
        'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), data_transforms['train']),
        'val': datasets.ImageFolder(os.path.join(data_dir, 'val'), data_transforms['val']),
        'test': datasets.ImageFolder(os.path.join(data_dir, 'test'), data_transforms['test'])
    }
    dataloaders = {
        'train': DataLoader(image_datasets['train'], batch_size=8, shuffle=True, num_workers=0),
        'val': DataLoader(image_datasets['val'], batch_size=8, shuffle=False, num_workers=0),
        'test': DataLoader(image_datasets['test'], batch_size=8, shuffle=False, num_workers=0)
    }
    class_names = image_datasets['train'].classes
    num_classes = len(class_names)
except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

# Load ResNet-18 
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Function to calculate metrics for a class
def calculate_class_metrics(tp, tn, fp, fn, class_name, phase):
    total = tp + tn + fp + fn
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    accuracy = (tp + tn) / total if total > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    mcc = ((tp * tn) - (fp * fn)) / ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) ** 0.5 if total > 0 else 0
    
    print(f'\n{phase} - Class: {class_name}')
    print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"MCC: {mcc:.4f}")
    
    return {"Precision": precision, "Recall": recall, "Specificity": specificity, 
            "Accuracy": accuracy, "F1 Score": f1, "MCC": mcc}

# Training and evaluation loop
def train_and_evaluate(model, criterion, optimizer, num_epochs=3):
    # Store metrics and confusion matrices for all epochs and phases
    all_class_metrics = {class_name: {"train": [], "val": [], "test": []} for class_name in class_names}
    macro_metrics = {"train": [], "val": [], "test": []}
    all_conf_matrices = {"train": [], "val": [], "test": []}
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        all_train_preds, all_train_labels = [], []
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            all_train_preds.extend(preds.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())
        epoch_loss = running_loss / len(image_datasets['train'])
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        memory = psutil.virtual_memory()
        print(f"Memory Usage: {memory.percent}%")

        # Validation and Test phases
        for phase in ['val', 'test']:
            model.eval()
            all_preds, all_labels = [], []
            with torch.no_grad():
                for inputs, labels in dataloaders[phase]:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    all_preds.extend(preds.cpu().numpy())
                    all_labels.extend(labels.cpu().numpy())
            
            # Confusion matrix
            cm = confusion_matrix(all_labels, all_preds)
            all_conf_matrices[phase].append(cm)
            
            # Calculate and store per-class metrics
            epoch_class_metrics = {}
            for i, class_name in enumerate(class_names):
                tp = cm[i, i]
                fp = np.sum(cm[:, i]) - tp
                fn = np.sum(cm[i, :]) - tp
                tn = np.sum(cm) - (tp + fp + fn)
                metrics = calculate_class_metrics(tp, tn, fp, fn, class_name, phase)
                epoch_class_metrics[class_name] = metrics
            all_class_metrics[class_name][phase].append(metrics)
            
            # Calculate and store macro-averaged metrics
            precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')
            acc = np.sum(np.diag(cm)) / np.sum(cm)
            specificity = np.mean([cm[i,i] / (cm[i,i] + np.sum(cm[:,i]) - cm[i,i]) for i in range(num_classes)])
            mcc = matthews_corrcoef(all_labels, all_preds)
            macro_metrics[phase].append({"Precision": precision, "Recall": recall, "Specificity": specificity, 
                                       "Accuracy": acc, "MCC": mcc, "F1 Score": f1})
            
            print(f'\nEpoch {epoch+1} {phase.upper()} Metrics (Macro-Averaged):')
            print(f"Precision: {precision:.4f}")
            print(f"Recall: {recall:.4f}")
            print(f"Specificity: {specificity:.4f}")
            print(f"Accuracy: {acc:.4f}")
            print(f"MCC: {mcc:.4f}")
            print(f"F1 Score: {f1:.4f}")

    # Print confusion matrices for the last epoch
    print("\nConfusion Matrices for the Last Epoch:")
    for phase in ['train', 'val', 'test']:
        print(f"\n{phase.upper()} Confusion Matrix:")
        cm_last = all_conf_matrices[phase][-1]  # Last epoch's confusion matrix
        print(cm_last)
        # Print per-class confusion matrix (diagonal elements)
        for i, class_name in enumerate(class_names):
            print(f"\n{class_name} (Class {i}) Confusion Matrix (Row {i}, Col {i}):")
            print(f"TP: {cm_last[i, i]}, FP: {np.sum(cm_last[:, i]) - cm_last[i, i]}, "
                  f"FN: {np.sum(cm_last[i, :]) - cm_last[i, i]}, TN: {np.sum(cm_last) - np.sum(cm_last[i, :]) - np.sum(cm_last[:, i]) + cm_last[i, i]}")

    # Generate graph comparing final epoch metrics across train, val, and test
    plt.figure(figsize=(12, 6))
    metrics_to_plot = ["Precision", "Recall", "Specificity", "Accuracy", "F1 Score", "MCC"]
    bar_width = 0.1
    index = np.arange(len(class_names) + 1)  # +1 for macro-averaged
    
    for i, metric in enumerate(metrics_to_plot):
        values = [all_class_metrics[class_name]["test"][-1][metric] for class_name in class_names]
        values.append(macro_metrics["test"][-1][metric])  # Test macro-averaged
        plt.bar(index + i * bar_width, values, bar_width, label=metric)

    plt.xlabel('Classes and Macro-Averaged (Test Set)')
    plt.ylabel('Metric Value')
    plt.title('Comparison of Metrics Across Classes and Macro-Averaged (Final Epoch, Test Set)')
    plt.xticks(index + bar_width * (len(metrics_to_plot) - 1) / 2, class_names + ['Macro-Averaged'])
    plt.legend()
    plt.tight_layout()
    plt.show()

# Train and evaluate the model with error handling
try:
    train_and_evaluate(model, criterion, optimizer, num_epochs=3)
except Exception as e:
    print(f"Error occurred: {e}")

# Save the model
torch.save(model.state_dict(), 'kvasir_resnet18.pth')

  data_dir = 'D:\Intelligent_Agents2\kvasir-dataset-v2'


Train, val, and test folders already exist. Skipping data split.
Epoch 1/3, Loss: 0.6569
Memory Usage: 81.8%

val - Class: dyed-lifted-polyps
TP: 115, TN: 1039, FP: 11, FN: 35
Precision: 0.9127
Recall: 0.7667
Specificity: 0.9895
Accuracy: 0.9617
F1 Score: 0.8333
MCC: 0.8158

val - Class: dyed-resection-margins
TP: 138, TN: 1025, FP: 25, FN: 12
Precision: 0.8466
Recall: 0.9200
Specificity: 0.9762
Accuracy: 0.9692
F1 Score: 0.8818
MCC: 0.8651

val - Class: esophagitis
TP: 124, TN: 1019, FP: 31, FN: 26
Precision: 0.8000
Recall: 0.8267
Specificity: 0.9705
Accuracy: 0.9525
F1 Score: 0.8131
MCC: 0.7861

val - Class: normal-cecum
TP: 131, TN: 1049, FP: 1, FN: 19
Precision: 0.9924
Recall: 0.8733
Specificity: 0.9990
Accuracy: 0.9833
F1 Score: 0.9291
MCC: 0.9221

val - Class: normal-pylorus
TP: 149, TN: 1040, FP: 10, FN: 1
Precision: 0.9371
Recall: 0.9933
Specificity: 0.9905
Accuracy: 0.9908
F1 Score: 0.9644
MCC: 0.9597

val - Class: normal-z-line
TP: 117, TN: 1028, FP: 22, FN: 33
Precision: 0.8

# 6th attempt

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from sklearn.metrics import confusion_matrix, matthews_corrcoef, precision_recall_fscore_support
import numpy as np
import os
import psutil  # To monitor memory
import matplotlib.pyplot as plt
import shutil
from sklearn.model_selection import train_test_split
import cv2
from PIL import Image

In [2]:
device = torch.device("cpu")

# Equalization function
def histogram_equalization(image):
    img_array = np.array(image).astype(np.uint8)
    if len(img_array.shape) == 3:  
        img_yuv = cv2.cvtColor(img_array, cv2.COLOR_RGB2YUV)
        img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])  
        img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2RGB)
    else:  
        img_output = cv2.equalizeHist(img_array)
    return Image.fromarray(img_output)

# Preprocessing 
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.8, 1.2)),  # Random zoom (scale 0.8x to 1.2x)
        transforms.Lambda(histogram_equalization),  # Histogram equalization
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.Lambda(histogram_equalization),  # Histogram equalization
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.Lambda(histogram_equalization),  # Histogram equalization
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


# Load dataset
data_dir = 'D:\Intelligent_Agents2\kvasir-dataset-v2'
try:
    image_datasets = {
        'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), data_transforms['train']),
        'val': datasets.ImageFolder(os.path.join(data_dir, 'val'), data_transforms['val']),
        'test': datasets.ImageFolder(os.path.join(data_dir, 'test'), data_transforms['test'])
    }
    print(f"Train dataset size: {len(image_datasets['train'])}, Val dataset size: {len(image_datasets['val'])}, Test dataset size: {len(image_datasets['test'])}")
    dataloaders = {
        'train': DataLoader(image_datasets['train'], batch_size=8, shuffle=True, num_workers=0),
        'val': DataLoader(image_datasets['val'], batch_size=8, shuffle=False, num_workers=0),
        'test': DataLoader(image_datasets['test'], batch_size=8, shuffle=False, num_workers=0)
    }
    class_names = image_datasets['train'].classes
    num_classes = len(class_names)
except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

# Load ResNet-18 
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Function to calculate metrics for a class
def calculate_class_metrics(tp, tn, fp, fn, class_name, phase):
    total = tp + tn + fp + fn
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    accuracy = (tp + tn) / total if total > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    mcc = ((tp * tn) - (fp * fn)) / ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) ** 0.5 if total > 0 else 0
    
    return {"Class": class_name, "TP": tp, "TN": tn, "FP": fp, "FN": fn,
            "Precision": precision, "Recall": recall, "Specificity": specificity,
            "Accuracy": accuracy, "F1 Score": f1, "MCC": mcc}

# Function to print metrics in a table
def print_metrics_table(metrics_dict, phase, epoch):
    headers = ["Class", "TP", "TN", "FP", "FN", "Precision", "Recall", "Specificity", "Accuracy", "F1 Score", "MCC"]
    print(f"\nEpoch {epoch} {phase.upper()} Metrics Table:")
    print("-" * 100)
    print(" | ".join(f"{h:<12}" for h in headers))
    print("-" * 100)
    
    for class_name, metrics in metrics_dict.items():
        row = [f"{metrics.get(k, 'N/A'):<12.4f}" if isinstance(metrics.get(k), (int, float)) else f"{metrics.get(k):<12}" 
               for k in ["Class", "TP", "TN", "FP", "FN", "Precision", "Recall", "Specificity", "Accuracy", "F1 Score", "MCC"]]
        print(" | ".join(row))
    
    # Add macro-averaged metrics if available
    if "Macro-Averaged" in metrics_dict:
        macro = metrics_dict["Macro-Averaged"]
        row = [f"{macro.get(k, 'N/A'):<12.4f}" if isinstance(macro.get(k), (int, float)) else f"{macro.get(k):<12}" 
               for k in ["Class", "TP", "TN", "FP", "FN", "Precision", "Recall", "Specificity", "Accuracy", "F1 Score", "MCC"]]
        print("-" * 100)
        print(" | ".join(row))
    print("-" * 100)

# Training and evaluation loop
def train_and_evaluate(model, criterion, optimizer, num_epochs=4):
    all_class_metrics = {class_name: {"train": [], "val": [], "test": []} for class_name in class_names}
    macro_metrics = {"train": [], "val": [], "test": []}
    all_conf_matrices = {"train": [], "val": [], "test": []}
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_train_preds, all_train_labels = [], []
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            all_train_preds.extend(preds.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())
        epoch_loss = running_loss / len(image_datasets['train'])
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        memory = psutil.virtual_memory()
        print(f"Memory Usage: {memory.percent}%")

        for phase in ['val', 'test']:
            model.eval()
            all_preds, all_labels = [], []
            with torch.no_grad():
                for inputs, labels in dataloaders[phase]:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    all_preds.extend(preds.cpu().numpy())
                    all_labels.extend(labels.cpu().numpy())
            
            print(f"{phase} phase: Collected {len(all_labels)} labels and {len(all_preds)} predictions")
            
            if len(all_labels) > 0 and len(all_preds) > 0:
                cm = confusion_matrix(all_labels, all_preds)
                all_conf_matrices[phase].append(cm)
                
                epoch_class_metrics = {}
                for i, class_name in enumerate(class_names):
                    tp = cm[i, i]
                    fp = np.sum(cm[:, i]) - tp
                    fn = np.sum(cm[i, :]) - tp
                    tn = np.sum(cm) - (tp + fp + fn)
                    metrics = calculate_class_metrics(tp, tn, fp, fn, class_name, phase)
                    epoch_class_metrics[class_name] = metrics
                
                precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')
                acc = np.sum(np.diag(cm)) / np.sum(cm)
                specificity = np.mean([cm[i,i] / (cm[i,i] + np.sum(cm[:,i]) - cm[i,i]) for i in range(num_classes)])
                mcc = matthews_corrcoef(all_labels, all_preds)
                macro_metrics[phase].append({"Precision": precision, "Recall": recall, "Specificity": specificity, 
                                           "Accuracy": acc, "MCC": mcc, "F1 Score": f1})
                epoch_class_metrics["Macro-Averaged"] = {"Class": "Macro-Averaged", "TP": np.nan, "TN": np.nan, 
                                                        "FP": np.nan, "FN": np.nan, "Precision": precision, 
                                                        "Recall": recall, "Specificity": specificity, 
                                                        "Accuracy": acc, "F1 Score": f1, "MCC": mcc}
                
                all_class_metrics[class_name][phase].append(metrics)
                print_metrics_table(epoch_class_metrics, phase, epoch + 1)
            else:
                print(f"Warning: No data collected for {phase} phase. Skipping metrics.")

    # Print confusion matrices for the last epoch
    print("\nConfusion Matrices for the Last Epoch:")
    for phase in ['train', 'val', 'test']:
        if all_conf_matrices[phase]:  # Check if there are any matrices
            print(f"\n{phase.upper()} Confusion Matrix:")
            cm_last = all_conf_matrices[phase][-1]  # Last epoch's confusion matrix
            print(cm_last)
            for i, class_name in enumerate(class_names):
                print(f"\n{class_name} (Class {i}) Confusion Matrix (Row {i}, Col {i}):")
                print(f"TP: {cm_last[i, i]}, FP: {np.sum(cm_last[:, i]) - cm_last[i, i]}, "
                      f"FN: {np.sum(cm_last[i, :]) - cm_last[i, i]}, TN: {np.sum(cm_last) - np.sum(cm_last[i, :]) - np.sum(cm_last[:, i]) + cm_last[i, i]}")
        else:
            print(f"\n{phase.upper()} Confusion Matrix: No data available.")

    # Generate graph comparing final epoch metrics across train, val, and test
    plt.figure(figsize=(12, 6))
    metrics_to_plot = ["Precision", "Recall", "Specificity", "Accuracy", "F1 Score", "MCC"]
    bar_width = 0.1
    index = np.arange(len(class_names) + 1)  # +1 for macro-averaged
    
    for i, metric in enumerate(metrics_to_plot):
        values = [all_class_metrics[class_name]["test"][-1][metric] for class_name in class_names]
        values.append(macro_metrics["test"][-1][metric])  # Test macro-averaged
        plt.bar(index + i * bar_width, values, bar_width, label=metric)

    plt.xlabel('Classes and Macro-Averaged (Test Set)')
    plt.ylabel('Metric Value')
    plt.title('Comparison of Metrics Across Classes and Macro-Averaged (Final Epoch, Test Set)')
    plt.xticks(index + bar_width * (len(metrics_to_plot) - 1) / 2, class_names + ['Macro-Averaged'])
    plt.legend()
    plt.tight_layout()
    plt.show()

# Train and evaluate the model with error handling
try:
    train_and_evaluate(model, criterion, optimizer, num_epochs=4)
except Exception as e:
    print(f"Error occurred: {e}")

# Save the model
torch.save(model.state_dict(), 'kvasir_resnet18.pth')

  data_dir = 'D:\Intelligent_Agents2\kvasir-dataset-v2'


Train dataset size: 5600, Val dataset size: 1200, Test dataset size: 1200




Epoch 1/4, Loss: 0.6422
Memory Usage: 71.3%
val phase: Collected 1200 labels and 1200 predictions

Epoch 1 VAL Metrics Table:
----------------------------------------------------------------------------------------------------
Class        | TP           | TN           | FP           | FN           | Precision    | Recall       | Specificity  | Accuracy     | F1 Score     | MCC         
----------------------------------------------------------------------------------------------------
dyed-lifted-polyps | 138          | 1007         | 43           | 12           | 0.7624       | 0.9200       | 0.9590       | 0.9542       | 0.8338       | 0.8123      
dyed-resection-margins | 107          | 1040         | 10           | 43           | 0.9145       | 0.7133       | 0.9905       | 0.9558       | 0.8015       | 0.7847      
esophagitis  | 102          | 1030         | 20           | 48           | 0.8361       | 0.6800       | 0.9810       | 0.9433       | 0.7500       | 0.7233      
norm

<Figure size 1200x600 with 0 Axes>