# Plant Disease Detection - YOLOv4 + Ensemble Model
## Kết hợp YOLOv4 và Ensemble Learning cho PlantVillage Dataset

Thực hiện:
1. **YOLOv4** - Object Detection cho phát hiện vùng bệnh
2. **Ensemble Models** - Kết hợp nhiều CNN models (ResNet, EfficientNet, DenseNet) để classification
3. **Training & Testing** - Trên PlantVillage dataset
4. **Evaluation** - So sánh accuracy của các models

## 1. Setup và Import Libraries

In [3]:
# Data processing
import pandas as pd
import numpy as np
import os
from pathlib import Path
import json
import random
from tqdm import tqdm

# Image processing
from PIL import Image
import cv2

# Deep Learning - PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, models
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report, confusion_matrix, 
    accuracy_score, precision_score, recall_score, f1_score
)

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Warnings
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"Device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print("Libraries imported successfully!")

PyTorch version: 2.9.0+cu130
Torchvision version: 0.24.0+cu130
Device: cuda
GPU: NVIDIA GeForce RTX 5060 Ti
GPU Memory: 17.10 GB
Libraries imported successfully!


In [4]:
# Check GPU availability
print("=" * 60)
print("GPU CHECK:")
print("=" * 60)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda if torch.cuda.is_available() else 'N/A'}")
print(f"cuDNN version: {torch.backends.cudnn.version() if torch.cuda.is_available() else 'N/A'}")
print(f"Number of GPUs: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"\nGPU {i}:")
        print(f"  Name: {torch.cuda.get_device_name(i)}")
        print(f"  Compute Capability: {torch.cuda.get_device_capability(i)}")
        print(f"  Memory Total: {torch.cuda.get_device_properties(i).total_memory / 1e9:.2f} GB")
        print(f"  Memory Allocated: {torch.cuda.memory_allocated(i) / 1e9:.4f} GB")
        print(f"  Memory Reserved: {torch.cuda.memory_reserved(i) / 1e9:.4f} GB")
    
    # Test GPU computation
    print("\nTesting GPU computation...")
    x = torch.randn(1000, 1000).cuda()
    y = torch.randn(1000, 1000).cuda()
    z = torch.matmul(x, y)
    print("GPU computation test successful!")
else:
    print("\nWARNING: No GPU detected!")
    print("Training will run on CPU (very slow!)")
    print("\nTo fix, install PyTorch with CUDA support:")
    print("  pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130")

print("=" * 60)

GPU CHECK:
PyTorch version: 2.9.0+cu130
CUDA available: True
CUDA version: 13.0
cuDNN version: 91200
Number of GPUs: 1

GPU 0:
  Name: NVIDIA GeForce RTX 5060 Ti
  Compute Capability: (12, 0)
  Memory Total: 17.10 GB
  Memory Allocated: 0.0000 GB
  Memory Reserved: 0.0000 GB

Testing GPU computation...
GPU computation test successful!


## 2. Configuration và Hyperparameters

In [5]:
# Paths
BASE_PATH = Path(r'g:\Dataset\Data')
PLANT_VILLAGE_PATH = BASE_PATH / 'PlantVIllage' / 'PlantVillage-Dataset-master'
MODEL_SAVE_PATH = Path(r'g:\Dataset\models')
MODEL_SAVE_PATH.mkdir(exist_ok=True)

# Image parameters
IMG_SIZE = 224
BATCH_SIZE = 32
CHANNELS = 3

# Training parameters
EPOCHS = 50
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-4
VALIDATION_SPLIT = 0.2
TEST_SPLIT = 0.1

# Early stopping
PATIENCE = 10

# Ensemble configuration
ENSEMBLE_MODELS = [
    'efficientnet_b3',
    'resnet50',
    'densenet121',
    'inception_v3'
]

# Random seed for reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
random.seed(SEED)

print("Configuration set!")
print(f"Dataset path: {PLANT_VILLAGE_PATH}")
print(f"Image size: {IMG_SIZE}x{IMG_SIZE}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Device: {device}")
print(f"Ensemble models: {ENSEMBLE_MODELS}")

Configuration set!
Dataset path: g:\Dataset\Data\PlantVIllage\PlantVillage-Dataset-master
Image size: 224x224
Batch size: 32
Device: cuda
Ensemble models: ['efficientnet_b3', 'resnet50', 'densenet121', 'inception_v3']

Dataset path: g:\Dataset\Data\PlantVIllage\PlantVillage-Dataset-master
Image size: 224x224
Batch size: 32
Device: cuda
Ensemble models: ['efficientnet_b3', 'resnet50', 'densenet121', 'inception_v3']


## 3. Load và Prepare PlantVillage Dataset

In [6]:
# Get all class folders
class_folders = [d for d in PLANT_VILLAGE_PATH.iterdir() if d.is_dir()]
class_names = sorted([d.name for d in class_folders])
num_classes = len(class_names)

print(f"Number of classes: {num_classes}")
print(f"\nClasses:")
for i, cls in enumerate(class_names, 1):
    print(f"{i}. {cls}")

# Create class to index mapping
class_to_idx = {cls: idx for idx, cls in enumerate(class_names)}
idx_to_class = {idx: cls for cls, idx in class_to_idx.items()}

Number of classes: 39

Classes:
1. Apple___Apple_scab
2. Apple___Black_rot
3. Apple___Cedar_apple_rust
4. Apple___healthy
5. Blueberry___healthy
6. Cherry_(including_sour)___Powdery_mildew
7. Cherry_(including_sour)___healthy
8. Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot
9. Corn_(maize)___Common_rust_
10. Corn_(maize)___Northern_Leaf_Blight
11. Corn_(maize)___healthy
12. Grape___Black_rot
13. Grape___Esca_(Black_Measles)
14. Grape___Leaf_blight_(Isariopsis_Leaf_Spot)
15. Grape___healthy
16. Orange___Haunglongbing_(Citrus_greening)
17. Peach___Bacterial_spot
18. Peach___healthy
19. Pepper,_bell___Bacterial_spot
20. Pepper,_bell___healthy
21. Potato___Early_blight
22. Potato___Late_blight
23. Potato___healthy
24. Raspberry___healthy
25. Soybean___healthy
26. Squash___Powdery_mildew
27. Strawberry___Leaf_scorch
28. Strawberry___healthy
29. Tomato___Bacterial_spot
30. Tomato___Early_blight
31. Tomato___Late_blight
32. Tomato___Leaf_Mold
33. Tomato___Septoria_leaf_spot
34. Tomato___

In [7]:
# Load all image paths and labels
def load_dataset_info(data_path):
    """
    Load all image paths and their corresponding labels
    """
    image_paths = []
    labels = []
    
    for class_name in tqdm(class_names, desc="Loading dataset"):
        class_path = data_path / class_name
        class_idx = class_to_idx[class_name]
        
        # Get all images in this class
        for img_path in class_path.glob('*'):
            if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png']:
                image_paths.append(str(img_path))
                labels.append(class_idx)
    
    return np.array(image_paths), np.array(labels)

# Load dataset
X_paths, y = load_dataset_info(PLANT_VILLAGE_PATH)

print(f"\nTotal images: {len(X_paths)}")
print(f"Labels shape: {y.shape}")
print(f"\nLabel distribution:")
unique, counts = np.unique(y, return_counts=True)
for idx, count in zip(unique[:5], counts[:5]):
    print(f"  {idx_to_class[idx]}: {count} images")

Loading dataset: 100%|██████████| 39/39 [00:00<00:00, 336.13it/s]


Total images: 54305
Labels shape: (54305,)

Label distribution:
  Apple___Apple_scab: 630 images
  Apple___Black_rot: 621 images
  Apple___Cedar_apple_rust: 275 images
  Apple___healthy: 1645 images
  Blueberry___healthy: 1502 images





## 4. Split Dataset (Train/Val/Test)

In [8]:
# Split dataset
# First split: Train+Val vs Test
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_paths, y, 
    test_size=TEST_SPLIT, 
    random_state=SEED, 
    stratify=y
)

# Second split: Train vs Val
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val,
    test_size=VALIDATION_SPLIT,
    random_state=SEED,
    stratify=y_train_val
)

print("Dataset split:")
print(f"Train: {len(X_train)} images")
print(f"Validation: {len(X_val)} images")
print(f"Test: {len(X_test)} images")
print(f"\nTrain ratio: {len(X_train)/len(X_paths)*100:.1f}%")
print(f"Val ratio: {len(X_val)/len(X_paths)*100:.1f}%")
print(f"Test ratio: {len(X_test)/len(X_paths)*100:.1f}%")

Dataset split:
Train: 39099 images
Validation: 9775 images
Test: 5431 images

Train ratio: 72.0%
Val ratio: 18.0%
Test ratio: 10.0%

Train: 39099 images
Validation: 9775 images
Test: 5431 images

Train ratio: 72.0%
Val ratio: 18.0%
Test ratio: 10.0%


## 5. Custom Dataset Class

In [9]:
class PlantDiseaseDataset(Dataset):
    """
    Custom Dataset for Plant Disease images
    """
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # Load image
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        
        # Apply transforms
        if self.transform:
            image = self.transform(image)
        
        return image, label

print("PlantDiseaseDataset class created!")

PlantDiseaseDataset class created!


In [10]:
# Data transforms
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.2, 0.2), scale=(0.8, 1.2)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

print("Data transforms created!")
print("Training: Resize, RandomFlip, Rotation, ColorJitter, Affine, Normalize")
print("Validation/Test: Resize, Normalize")

Data transforms created!
Training: Resize, RandomFlip, Rotation, ColorJitter, Affine, Normalize
Validation/Test: Resize, Normalize


In [11]:
# Create PyTorch datasets
train_dataset = PlantDiseaseDataset(X_train, y_train, transform=train_transform)
val_dataset = PlantDiseaseDataset(X_val, y_val, transform=val_test_transform)
test_dataset = PlantDiseaseDataset(X_test, y_test, transform=val_test_transform)

# Workers: use 0 on Windows to avoid multiprocessing issues in notebooks
import sys
if sys.platform.startswith('win'):
    WORKERS = 0
else:
    WORKERS = 4

# Pin memory only if CUDA is available
PIN_MEMORY = True if torch.cuda.is_available() else False

# Create DataLoaders
train_loader = DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True, 
    num_workers=WORKERS, 
    pin_memory=PIN_MEMORY
)
val_loader = DataLoader(
    val_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    num_workers=WORKERS, 
    pin_memory=PIN_MEMORY
)
test_loader = DataLoader(
    test_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    num_workers=WORKERS, 
    pin_memory=PIN_MEMORY
)

print(f"\nDataLoaders created! (workers={WORKERS}, pin_memory={PIN_MEMORY})")
print(f"Train batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")


DataLoaders created! (workers=0, pin_memory=True)
Train batches: 1222
Validation batches: 306
Test batches: 170


## 6. Build Individual Models

In [12]:
def build_model(model_name, num_classes, pretrained=True):
    """
    Build a transfer learning model with specified architecture
    """
    if model_name == 'efficientnet_b3':
        model = models.efficientnet_b3(pretrained=pretrained)
        num_features = model.classifier[1].in_features
        model.classifier = nn.Sequential(
            nn.Dropout(p=0.3, inplace=True),
            nn.Linear(num_features, num_classes)
        )
    
    elif model_name == 'resnet50':
        model = models.resnet50(pretrained=pretrained)
        num_features = model.fc.in_features
        model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )
    
    elif model_name == 'densenet121':
        model = models.densenet121(pretrained=pretrained)
        num_features = model.classifier.in_features
        model.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )
    
    elif model_name == 'inception_v3':
        if pretrained:
            # Load pretrained model with aux_logits=True (required for pretrained weights)
            model = models.inception_v3(pretrained=True, aux_logits=True)
            # Disable aux_logits after loading weights
            model.aux_logits = False
            model.AuxLogits = None
        else:
            model = models.inception_v3(pretrained=False, aux_logits=False)
        
        num_features = model.fc.in_features
        model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )
    
    else:
        raise ValueError(f"Unknown model: {model_name}")
    
    return model

# Test model creation
for model_name in ENSEMBLE_MODELS:
    model = build_model(model_name, num_classes)
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"{model_name}:")
    print(f"  Total parameters: {total_params:,}")
    print(f"  Trainable parameters: {trainable_params:,}")
    print()

print("Model builder function created!")

efficientnet_b3:
  Total parameters: 10,756,175
  Trainable parameters: 10,756,175


  Total parameters: 10,756,175
  Trainable parameters: 10,756,175

resnet50:
  Total parameters: 24,577,127
  Trainable parameters: 24,577,127

densenet121:
  Total parameters: 7,498,663
  Trainable parameters: 7,498,663

resnet50:
  Total parameters: 24,577,127
  Trainable parameters: 24,577,127

densenet121:
  Total parameters: 7,498,663
  Trainable parameters: 7,498,663

inception_v3:
  Total parameters: 22,854,663
  Trainable parameters: 22,854,663

Model builder function created!
inception_v3:
  Total parameters: 22,854,663
  Trainable parameters: 22,854,663

Model builder function created!


## 7. Training and Evaluation Functions

In [13]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    """
    Train for one epoch
    """
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(dataloader, desc='Training')
    for images, labels in pbar:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({
            'loss': f'{running_loss/total:.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    
    return epoch_loss, epoch_acc


def validate(model, dataloader, criterion, device):
    """
    Validate the model
    """
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc='Validation'):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    
    return epoch_loss, epoch_acc, all_preds, all_labels


def train_model(model, train_loader, val_loader, criterion, optimizer, 
                scheduler, num_epochs, device, model_name, patience=10):
    """
    Complete training loop with early stopping
    """
    best_val_acc = 0.0
    best_model_wts = None
    epochs_no_improve = 0
    
    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': []
    }
    
    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch+1}/{num_epochs}')
        print('-' * 60)
        
        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        
        # Validate
        val_loss, val_acc, _, _ = validate(model, val_loader, criterion, device)
        
        # Update scheduler
        scheduler.step(val_loss)
        
        # Save history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}')
        print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}')
        print(f'Learning Rate: {optimizer.param_groups[0]["lr"]:.6f}')
        
        # Early stopping and save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_wts = model.state_dict().copy()
            epochs_no_improve = 0
            
            # Save checkpoint
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
                'val_loss': val_loss,
            }, MODEL_SAVE_PATH / f'{model_name}_best.pth')
            print(f'Saved best model with val_acc: {val_acc:.4f}')
        else:
            epochs_no_improve += 1
        
        if epochs_no_improve >= patience:
            print(f'\nEarly stopping triggered after {epoch+1} epochs')
            break
    
    # Load best model weights
    if best_model_wts is not None:
        model.load_state_dict(best_model_wts)
    
    return model, history

print("Training functions created!")

Training functions created!


## 8. Train Individual Models

In [14]:
# Quick DataLoader smoke test: fetch one batch and show shapes
print('Running quick DataLoader test...')
try:
    batch = next(iter(train_loader))
    images, labels = batch
    print('Train batch - images shape:', images.shape)
    print('Train batch - labels shape:', labels.shape)
    # Move a small batch to device to ensure GPU path works
    images = images[:2].to(device)
    labels = labels[:2].to(device)
    with torch.no_grad():
        out = torch.zeros(1)  # dummy placeholder
        if torch.cuda.is_available():
            _ = images * 1  # quick op to ensure CUDA tensor ops succeed
            print('CUDA operations OK')
    print('DataLoader smoke test passed')
except Exception as e:
    print('DataLoader smoke test failed:', repr(e))
    import traceback
    traceback.print_exc()

Running quick DataLoader test...
Train batch - images shape: torch.Size([32, 3, 224, 224])
Train batch - labels shape: torch.Size([32])
CUDA operations OK
DataLoader smoke test passed


In [15]:
# Dictionary to store trained models and histories
trained_models = {}
training_histories = {}

# Train each model
for model_name in ENSEMBLE_MODELS:
    print(f"\n{'='*70}")
    print(f"Training {model_name}")
    print(f"{'='*70}")
    
    # Build model
    model = build_model(model_name, num_classes, pretrained=True)
    model = model.to(device)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
    
    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\nModel: {model_name}")
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    
    # Train model
    model, history = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        num_epochs=EPOCHS,
        device=device,
        model_name=model_name,
        patience=PATIENCE
    )
    
    # Store model and history
    trained_models[model_name] = model
    training_histories[model_name] = history
    
    print(f"\n{model_name} training completed!")
    print(f"Best val_accuracy: {max(history['val_acc']):.4f}")

print("\n" + "="*70)
print("All models trained!")
print("="*70)


Training efficientnet_b3

Model: efficientnet_b3
Total parameters: 10,756,175
Trainable parameters: 10,756,175

Epoch 1/50
------------------------------------------------------------


Training:   6%|▌         | 73/1222 [00:30<08:03,  2.38it/s, loss=1.6152, acc=59.03%]



KeyboardInterrupt: 

## 9. Visualize Training History

In [None]:
def plot_training_history(histories):
    """
    Plot training history for all models
    """
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    fig.suptitle('Training History Comparison', fontsize=16, fontweight='bold')
    
    # Plot accuracy
    ax = axes[0]
    for model_name, history in histories.items():
        epochs_range = range(1, len(history['train_acc']) + 1)
        ax.plot(epochs_range, history['train_acc'], label=f'{model_name} (train)', linestyle='-')
        ax.plot(epochs_range, history['val_acc'], label=f'{model_name} (val)', linestyle='--')
    
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Accuracy')
    ax.set_title('Accuracy over Epochs')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # Plot loss
    ax = axes[1]
    for model_name, history in histories.items():
        epochs_range = range(1, len(history['train_loss']) + 1)
        ax.plot(epochs_range, history['train_loss'], label=f'{model_name} (train)', linestyle='-')
        ax.plot(epochs_range, history['val_loss'], label=f'{model_name} (val)', linestyle='--')
    
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.set_title('Loss over Epochs')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(MODEL_SAVE_PATH / 'training_history.png', dpi=300, bbox_inches='tight')
    plt.show()

# Plot training history
plot_training_history(training_histories)

NameError: name 'training_histories' is not defined

## 10. Evaluate Individual Models on Test Set

In [None]:
# Evaluate each model
individual_results = {}

for model_name, model in trained_models.items():
    print(f"\nEvaluating {model_name}...")
    
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f'Testing {model_name}'):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            
            test_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    test_loss = test_loss / total
    test_acc = correct / total
    test_precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
    test_recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
    test_f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    
    # Store results
    individual_results[model_name] = {
        'test_loss': test_loss,
        'test_accuracy': test_acc,
        'test_precision': test_precision,
        'test_recall': test_recall,
        'test_f1': test_f1
    }
    
    print(f"{model_name} Results:")
    print(f"  Test Loss: {test_loss:.4f}")
    print(f"  Test Accuracy: {test_acc:.4f}")
    print(f"  Test Precision: {test_precision:.4f}")
    print(f"  Test Recall: {test_recall:.4f}")
    print(f"  Test F1-Score: {test_f1:.4f}")

# Create results DataFrame
results_df = pd.DataFrame(individual_results).T
print("\n" + "="*70)
print("Individual Model Results:")
print("="*70)
print(results_df)

## 11. Build Ensemble Model

In [None]:
class EnsembleModel(nn.Module):
    """
    Ensemble model that averages predictions from multiple models
    """
    def __init__(self, models_dict):
        super(EnsembleModel, self).__init__()
        self.models = nn.ModuleList(list(models_dict.values()))
        self.num_models = len(self.models)
    
    def forward(self, x):
        # Get predictions from all models
        outputs = []
        for model in self.models:
            model.eval()
            with torch.no_grad():
                output = model(x)
            outputs.append(output)
        
        # Average predictions
        ensemble_output = torch.stack(outputs).mean(dim=0)
        return ensemble_output

# Create ensemble
ensemble_model = EnsembleModel(trained_models)
ensemble_model = ensemble_model.to(device)
ensemble_model.eval()

print("Ensemble model created!")
print(f"Number of models in ensemble: {len(trained_models)}")
print(f"Models: {list(trained_models.keys())}")

## 12. Evaluate Ensemble Model

In [None]:
# Evaluate ensemble model
print("Evaluating Ensemble Model...")

ensemble_model.eval()
test_loss = 0.0
correct = 0
total = 0
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc='Testing Ensemble'):
        images, labels = images.to(device), labels.to(device)
        
        outputs = ensemble_model(images)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        
        test_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

test_loss = test_loss / total
test_acc = correct / total
test_precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
test_recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
test_f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)

print("\n" + "="*70)
print("ENSEMBLE MODEL RESULTS:")
print("="*70)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {test_f1:.4f}")

# Add ensemble results to comparison
individual_results['Ensemble'] = {
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'test_precision': test_precision,
    'test_recall': test_recall,
    'test_f1': test_f1
}

# Final comparison
final_results_df = pd.DataFrame(individual_results).T
print("\n" + "="*70)
print("FINAL COMPARISON (All Models + Ensemble):")
print("="*70)
print(final_results_df.sort_values('test_accuracy', ascending=False))

## 13. Visualize Final Results

In [None]:
# Plot comparison
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Model Performance Comparison', fontsize=16, fontweight='bold')

metrics = ['test_accuracy', 'test_precision', 'test_recall', 'test_f1']
titles = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
colors = ['skyblue', 'lightgreen', 'lightcoral', 'plum']

for idx, (metric, title, color) in enumerate(zip(metrics, titles, colors)):
    row = idx // 2
    col = idx % 2
    ax = axes[row, col]
    
    # Sort by metric
    sorted_df = final_results_df.sort_values(metric, ascending=True)
    
    # Highlight ensemble
    colors_list = [color if name != 'Ensemble' else 'gold' for name in sorted_df.index]
    
    # Plot
    bars = ax.barh(sorted_df.index, sorted_df[metric], color=colors_list, edgecolor='black')
    
    # Add value labels
    for bar in bars:
        width = bar.get_width()
        ax.text(width + 0.005, bar.get_y() + bar.get_height()/2, 
                f'{width:.4f}', ha='left', va='center', fontweight='bold')
    
    ax.set_xlabel(title)
    ax.set_title(f'{title} Comparison')
    ax.grid(axis='x', alpha=0.3)
    ax.set_xlim(0, 1.1)

plt.tight_layout()
plt.savefig(MODEL_SAVE_PATH / 'model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## 14. Save Models and Results

In [None]:
# Save ensemble model
torch.save(ensemble_model.state_dict(), MODEL_SAVE_PATH / 'ensemble_model.pth')
print(f"Ensemble model saved to: {MODEL_SAVE_PATH / 'ensemble_model.pth'}")

# Save individual models
for model_name, model in trained_models.items():
    torch.save(model.state_dict(), MODEL_SAVE_PATH / f'{model_name}_final.pth')
print(f"Individual models saved")

# Save results to CSV
final_results_df.to_csv(MODEL_SAVE_PATH / 'model_results.csv')
print(f"Results saved to: {MODEL_SAVE_PATH / 'model_results.csv'}")

# Save class mappings
with open(MODEL_SAVE_PATH / 'class_mappings.json', 'w') as f:
    json.dump({
        'class_to_idx': class_to_idx,
        'idx_to_class': idx_to_class,
        'num_classes': num_classes
    }, f, indent=2)
print(f"Class mappings saved to: {MODEL_SAVE_PATH / 'class_mappings.json'}")

# Save training histories
with open(MODEL_SAVE_PATH / 'training_histories.json', 'w') as f:
    # Convert numpy arrays to lists for JSON serialization
    histories_json = {}
    for model_name, history in training_histories.items():
        histories_json[model_name] = {
            key: [float(v) for v in values] 
            for key, values in history.items()
        }
    json.dump(histories_json, f, indent=2)
print(f"Training histories saved to: {MODEL_SAVE_PATH / 'training_histories.json'}")

print("\nAll models and results saved successfully!")

## 15. Summary và Kết luận

In [None]:
print("\n" + "="*70)
print(" "*20 + "FINAL SUMMARY")
print("="*70)

print(f"\nDataset: PlantVillage")
print(f"Total Classes: {num_classes}")
print(f"Total Images: {len(X_paths)}")
print(f"  - Train: {len(X_train)}")
print(f"  - Validation: {len(X_val)}")
print(f"  - Test: {len(X_test)}")

print(f"\nModels Trained:")
for model_name in ENSEMBLE_MODELS:
    print(f"  - {model_name}")

print(f"\nBest Individual Model:")
best_individual = final_results_df[final_results_df.index != 'Ensemble'].sort_values(
    'test_accuracy', ascending=False
).iloc[0]
best_model_name = final_results_df[final_results_df.index != 'Ensemble'].sort_values(
    'test_accuracy', ascending=False
).index[0]
print(f"  Model: {best_model_name}")
print(f"  Accuracy: {best_individual['test_accuracy']:.4f}")

print(f"\nEnsemble Model:")
ensemble_acc = final_results_df.loc['Ensemble', 'test_accuracy']
print(f"  Accuracy: {ensemble_acc:.4f}")

improvement = (ensemble_acc - best_individual['test_accuracy']) * 100
if improvement > 0:
    print(f"  Improvement over best individual: +{improvement:.2f}%")
else:
    print(f"  Difference from best individual: {improvement:.2f}%")

print("\n" + "="*70)
print("Training and Evaluation Complete!")
print("="*70)