# DML Training Logic Review & Corrections

## Critical Flaws Found in `dml_train.py`

After comparing with this reference notebook, several critical issues were identified in the original `dml_train.py` implementation:

### 🚨 **Major Issues**

1. **WRONG KL Divergence Implementation**
   - ❌ Used PyTorch's `F.kl_div` 
   - ✅ Should use manual KL calculation: `teacher_probs * log(teacher_probs / student_probs)`

2. **MISSING Gradient Clipping**
   - ❌ No gradient clipping in original
   - ✅ Should use `torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)`

3. **WRONG Optimizer Stepping**
   - ❌ Used gradient accumulation with step counting
   - ✅ Should step optimizer every batch immediately

4. **INCORRECT Scheduler Usage**
   - ❌ Used `ReduceLROnPlateau`
   - ✅ Should use `OneCycleLR` with per-batch stepping

5. **WRONG Loss Weighting**
   - ❌ Used `alpha=0.7, beta=0.3`
   - ✅ Should use `0.5` weight for KL terms as shown in this notebook

### ✅ **Corrected Implementation**

A corrected version `dml_train_corrected.py` has been created that follows this notebook's proven approach:

- **Manual KL divergence** calculation
- **AdamW optimizers** with proper hyperparameters  
- **OneCycleLR schedulers** with per-batch stepping
- **Gradient clipping** at 1.0 norm
- **Proper loss weighting** (0.5 for KL terms)
- **Per-batch optimization** instead of gradient accumulation

The corrected version should achieve much better mutual learning performance!

In [18]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, Subset
import torchvision
from torchvision import datasets, transforms
from torchvision import models
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset
from sklearn.metrics import accuracy_score
import numpy as np
import random
from PIL import Image, ImageEnhance
from torch.utils.data import random_split
import matplotlib.pyplot as plt
import pandas as pd
from skimage import color
import torch.nn.functional as F
import cv2
import copy
import torch.nn.init as init
import gc
import time
import math

In [19]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.cuda.empty_cache()
    
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Seeds set to {seed} for reproducibility")

In [20]:
class_mapping = {
    0: 1,
    1: 2,
    2: 0,
}

train_dir = r"D:\6. OAI HCMC 2025\OAI-FINAL\ck-aio-hutech\train2"
test_dir = r"D:\6. OAI HCMC 2025\OAI-FINAL\test"

In [None]:
class CustomTransforms:
    def __init__(self, mean, std, image_size=32):
        self.mean = mean
        self.std = std
        self.image_size = image_size
        
    def get_eval_transform(self):
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((self.image_size, self.image_size)),
            transforms.Normalize(self.mean, self.std)
        ])
    
    def get_transform1(self):
        return transforms.Compose([
            transforms.PILToTensor(),
            transforms.ConvertImageDtype(torch.float32),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomResizedCrop(size=(self.image_size, self.image_size), scale=(0.8, 1)),
            transforms.Normalize(self.mean, self.std)
        ])
    
    def get_transform2(self):
        return transforms.Compose([
            transforms.PILToTensor(),
            transforms.ConvertImageDtype(torch.float32),
            transforms.Resize((self.image_size, self.image_size)),
            transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=0.15),
            transforms.Normalize(self.mean, self.std)
        ])

    def get_transform3(self):
        return transforms.Compose([
            transforms.PILToTensor(),
            transforms.ConvertImageDtype(torch.float32),
            transforms.Resize((self.image_size, self.image_size)),
            transforms.Normalize(self.mean, self.std),
            transforms.RandomErasing(p=0.1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False),
        ])

    def get_transform4(self):
        return transforms.Compose([
            transforms.PILToTensor(),
            transforms.ConvertImageDtype(torch.float32),
            transforms.Resize(size=(self.image_size, self.image_size)),
            transforms.Normalize(self.mean, self.std),
            transforms.ElasticTransform(), 
        ])
    
    def get_transform5(self):
        return transforms.Compose([
            transforms.PILToTensor(),
            transforms.ConvertImageDtype(torch.float32),
            transforms.Resize(size=(self.image_size, self.image_size)),
            transforms.Normalize(self.mean, self.std),
        ])

    def get_transform6(self):
        return transforms.Compose([
            transforms.PILToTensor(),
            transforms.ConvertImageDtype(torch.float32),
            transforms.Resize(size=(self.image_size, self.image_size)),
            transforms.Normalize(self.mean, self.std),
            transforms.RandomRotation(degrees=90),
        ])
    
    def get_all_transforms(self):
        return [
            self.get_transform1(),
            self.get_transform2(),
            self.get_transform3(),
            self.get_transform4(),
            self.get_transform5(),
        ]

In [22]:
class MushroomDataset(Dataset):
    def __init__(self, root_dir, transform=None, is_test=False):
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test
        self.samples = []
        self.classes = []
        self.class_to_idx = {}

        if not os.path.exists(root_dir):
            print(f"Warning: Directory {root_dir} does not exist.")
            return
        try:
            items = os.listdir(root_dir)
            if self.is_test or any(item.lower().endswith(('.png', '.jpg', '.jpeg')) for item in items):
                self._setup_test_dataset(root_dir)
            else:
                self._setup_train_dataset(root_dir)
        except Exception as e:
            print(f"Error setting up dataset: {e}")
            self.classes = ["unknown"]
            self.class_to_idx = {"unknown": 0}

    def _setup_test_dataset(self, root_dir):
        self.classes = ["unknown"]
        self.class_to_idx = {"unknown": 0}

        for img_name in sorted(os.listdir(root_dir)):
            if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(root_dir, img_name)
                if os.path.isfile(img_path):
                    self.samples.append((img_path, -1))

    def _setup_train_dataset(self, root_dir):
        self.classes = sorted([d for d in os.listdir(root_dir)
                              if os.path.isdir(os.path.join(root_dir, d))])
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}

        for class_name in self.classes:
            class_dir = os.path.join(root_dir, class_name)
            if os.path.isdir(class_dir):
                for img_name in os.listdir(class_dir):
                    img_path = os.path.join(class_dir, img_name)
                    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                        self.samples.append((img_path, self.class_to_idx[class_name]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            placeholder = torch.zeros((3, 32, 32))
            return placeholder, label

class MultiAugmentDataset(Dataset):
    def __init__(self, dataset, num_copies=2, transforms_list=None):
        self.dataset = dataset
        self.num_copies = num_copies

        if transforms_list is None:
            self.transforms_list = [dataset.transform] * num_copies
        else:
            assert len(transforms_list) == num_copies, "Number of transforms must match num_copies"
            self.transforms_list = transforms_list

        self.original_transform = dataset.transform

    def __len__(self):
        return len(self.dataset) * self.num_copies

    def __getitem__(self, idx):
        real_idx = idx % len(self.dataset)
        copy_idx = idx // len(self.dataset)

        self.dataset.transform = self.transforms_list[copy_idx]
        image, label = self.dataset[real_idx]
        self.dataset.transform = self.original_transform

        return image, label

def setup_data_loaders(train_dir, test_dir, batch_size=32, val_split=0.1, 
                       transforms_list=None, eval_transform=None, use_multi_augment=True):
    print("Setting up training and test datasets with mixed augmentation strategies...")

    if not os.path.exists(train_dir):
        print(f"Warning: Training directory {train_dir} does not exist!")
        os.makedirs(train_dir, exist_ok=True)

    train_dataset = MushroomDataset(train_dir, transform=None)

    if use_multi_augment:
        print("Using multi-augmentation strategy (6 copies with different transforms)")
        train_dataset = MultiAugmentDataset(
            train_dataset,
            num_copies=5,
            transforms_list=transforms_list
        )
    else:
        train_dataset.transform = transforms_list[0]

    train_size = int((1 - val_split) * len(train_dataset))
    val_size = len(train_dataset) - train_size

    train_dataset, valid_dataset = random_split(
        train_dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42)
    )

    test_dataset = MushroomDataset(test_dir, transform=eval_transform, is_test=True)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

    print(f"Training samples: {len(train_dataset)}")
    print(f"Validation samples: {len(valid_dataset)}")
    print(f"Test samples: {len(test_dataset)}")

    return train_loader, valid_loader, test_loader

In [23]:
def calculate_mean_std(dataset, batch_size=32):
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    mean = 0.
    std = 0.
    for images, _ in loader:
        batch_samples = images.size(0) 
        images = images.view(batch_samples, 3, -1)  
        mean += images.mean(2).mean(0) 
        std += images.std(2).std(0)  
    mean /= len(loader)
    std /= len(loader)
    return mean, std

mean, std = calculate_mean_std(MushroomDataset(train_dir, transform=transforms.ToTensor()), batch_size=32)
eval_transform = CustomTransforms(mean=mean.tolist(), std=std.tolist(), image_size=128).get_eval_transform()
transforms_list = CustomTransforms(mean=mean.tolist(), std=std.tolist(), image_size=128).get_all_transforms()

trainloader, valloader, testloader = setup_data_loaders(
    train_dir = train_dir,
    test_dir = test_dir,
    batch_size=64,
    val_split=0.1,
    transforms_list=transforms_list,
    eval_transform=eval_transform,
    use_multi_augment=True
)

Setting up training and test datasets with mixed augmentation strategies...
Using multi-augmentation strategy (6 copies with different transforms)
Training samples: 4725
Validation samples: 525
Test samples: 450


In [24]:
class efficientnetb6_model(nn.Module):
    def __init__(self, num_classes=4, pretrained=True):
        super().__init__()
        self.num_classes = num_classes

        self.efficientnetb6 = models.efficientnet_b6(
            weights=models.EfficientNet_B6_Weights.IMAGENET1K_V1 if pretrained else None
        )

        self.features = nn.Sequential(*list(self.efficientnetb6.features.children()))

        num_features = self.efficientnetb6.classifier[1].in_features

        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(num_features, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        features = self.features(x)
        pooled = self.efficientnetb6.avgpool(features)
        x = torch.flatten(pooled, 1)
        x = self.classifier(x)
        return x

In [None]:
class efficientnetv2_m_model(nn.Module):
    def __init__(self, num_classes=4, pretrained=True):
        super().__init__()
        self.num_classes = num_classes

        self.efficientnetv2_m = models.efficientnet_v2_m(
            weights=models.EfficientNet_V2_M_Weights.IMAGENET1K_V1 if pretrained else None
        )

        self.features = nn.Sequential(*list(self.efficientnetv2_m.features.children()))

        num_features = self.efficientnetv2_m.classifier[1].in_features

        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(num_features, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        features = self.features(x)
        pooled = self.efficientnetv2_m.avgpool(features)
        x = torch.flatten(pooled, 1)
        x = self.classifier(x)
        return x

In [None]:
class resnet101_model(nn.Module):
    def __init__(self, num_classes=4, pretrained=True):
        super().__init__()

        self.resnet101 = models.resnet101(weights=models.ResNet101_Weights.IMAGENET1K_V2 if pretrained else None)

        self.resnet101.fc = nn.Sequential(
            nn.Linear(self.resnet101.fc.in_features, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.resnet101(x)
        return x

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [28]:
def mutual_training_model_v3(model1, model2, model3,
                            train_loader=trainloader, valid_loader=valloader,
                            num_epochs=15, learning_rate=0.001, device='cuda'):
    model1 = model1.to(device)
    model2 = model2.to(device)
    model3 = model3.to(device)

    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

    model1_optimizer = optim.AdamW(model1.parameters(), lr=learning_rate, weight_decay=0.01, betas=(0.9, 0.999))
    model2_optimizer = optim.AdamW(model2.parameters(), lr=learning_rate, weight_decay=0.01, betas=(0.9, 0.999))
    model3_optimizer = optim.AdamW(model3.parameters(), lr=learning_rate, weight_decay=0.01, betas=(0.9, 0.999))

    scheduler_model1 = optim.lr_scheduler.OneCycleLR(
        model1_optimizer,
        max_lr=learning_rate,
        epochs=num_epochs,
        steps_per_epoch=len(train_loader),
        pct_start=0.3,
        div_factor=25,
        final_div_factor=500,
        anneal_strategy='cos'
    )

    scheduler_model2 = optim.lr_scheduler.OneCycleLR(
        model2_optimizer,
        max_lr=learning_rate,
        epochs=num_epochs,
        steps_per_epoch=len(train_loader),
        pct_start=0.3,
        div_factor=25,
        final_div_factor=500,
        anneal_strategy='cos'
    )

    scheduler_model3 = optim.lr_scheduler.OneCycleLR(
        model3_optimizer,
        max_lr=learning_rate,
        epochs=num_epochs,
        steps_per_epoch=len(train_loader),
        pct_start=0.3,
        div_factor=25,
        final_div_factor=500,
        anneal_strategy='cos'
    )

    best_val_acc = 0.0
    patience = 3
    counter = 0

    scaler = torch.cuda.amp.GradScaler(init_scale=2**16, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000)

    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    for epoch in range(num_epochs):
        model1.train()
        model2.train()
        model3.train()

        running_loss = 0.0
        running_corrects = 0
        total_samples = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            model1_optimizer.zero_grad()
            model2_optimizer.zero_grad()
            model3_optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                model1_outputs = model1(inputs)
                model2_outputs = model2(inputs)
                model3_outputs = model3(inputs)

                soft_labels_from_model2 = torch.softmax(model2_outputs.detach(), dim=1)
                soft_labels_from_model1 = torch.softmax(model1_outputs.detach(), dim=1)
                soft_labels_from_model3 = torch.softmax(model3_outputs.detach(), dim=1)

                model1_loss = criterion(model1_outputs, labels) + 0.5 * torch.mean(
                      torch.sum(soft_labels_from_model2 * torch.log(soft_labels_from_model2 / soft_labels_from_model1 + 1e-6), dim=1)
                ) + 0.5 * torch.mean(
                      torch.sum(soft_labels_from_model3 * torch.log(soft_labels_from_model3 / soft_labels_from_model1 + 1e-6), dim=1)
                )

                model2_loss = criterion(model2_outputs, labels) + 0.5 * torch.mean(
                      torch.sum(soft_labels_from_model1 * torch.log(soft_labels_from_model1 / soft_labels_from_model2 + 1e-6), dim=1)
                ) + 0.5 * torch.mean(
                      torch.sum(soft_labels_from_model3 * torch.log(soft_labels_from_model3 / soft_labels_from_model2 + 1e-6), dim=1)
                )

                model3_loss = criterion(model3_outputs, labels) + 0.5 * torch.mean(
                      torch.sum(soft_labels_from_model1 * torch.log(soft_labels_from_model1 / soft_labels_from_model3 + 1e-6), dim=1)
                ) + 0.5 * torch.mean(
                      torch.sum(soft_labels_from_model2 * torch.log(soft_labels_from_model2 / soft_labels_from_model3 + 1e-6), dim=1)
                )

            scaler.scale(model1_loss).backward()
            scaler.scale(model2_loss).backward()
            scaler.scale(model3_loss).backward()

            torch.nn.utils.clip_grad_norm_(model1.parameters(), max_norm=1.0)
            torch.nn.utils.clip_grad_norm_(model2.parameters(), max_norm=1.0)
            torch.nn.utils.clip_grad_norm_(model3.parameters(), max_norm=1.0)

            scaler.step(model1_optimizer)
            scaler.step(model2_optimizer)
            scaler.step(model3_optimizer)
            scaler.update()

            scheduler_model1.step()
            scheduler_model2.step()
            scheduler_model3.step()

            _, preds1 = torch.max(model1_outputs, 1)
            _, preds2 = torch.max(model2_outputs, 1)
            _, preds3 = torch.max(model3_outputs, 1)
            running_loss += ((model1_loss.item() + model2_loss.item() + model3_loss.item())/3) * inputs.size(0)
            running_corrects += (torch.sum(preds1 == labels.data).item() + torch.sum(preds2 == labels.data).item() + torch.sum(preds3 == labels.data).item()) / 3
            total_samples += inputs.size(0)

        epoch_loss = running_loss / total_samples
        epoch_acc = running_corrects / total_samples
        train_losses.append(epoch_loss)
        train_accs.append(epoch_acc)

        model1.eval()
        model2.eval()
        model3.eval()
        val_running_loss = 0.0
        val_running_corrects1, val_running_corrects2, val_running_corrects3 = 0, 0, 0
        val_total_samples = 0

        with torch.no_grad():
            for inputs, labels in valid_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                model1_outputs = model1(inputs)
                model2_outputs = model2(inputs)
                model3_outputs = model3(inputs)

                model1_preds = torch.argmax(model1_outputs, dim=1)
                model2_preds = torch.argmax(model2_outputs, dim=1)
                model3_preds = torch.argmax(model3_outputs, dim=1)

                loss1 = criterion(model1_outputs, labels)
                loss2 = criterion(model2_outputs, labels)
                loss3 = criterion(model3_outputs, labels)
                loss = (loss1 + loss2 + loss3) / 3

                val_running_loss += loss.item() * inputs.size(0)
                val_running_corrects1 += torch.sum(model1_preds == labels.data).item()
                val_running_corrects2 += torch.sum(model2_preds == labels.data).item()
                val_running_corrects3 += torch.sum(model3_preds == labels.data).item()
                val_total_samples += inputs.size(0)

        val_epoch_loss = val_running_loss / val_total_samples
        val_epoch_acc = (val_running_corrects1 + val_running_corrects2 + val_running_corrects3)  / (3*val_total_samples)
        val_losses.append(val_epoch_loss)
        val_accs.append(val_epoch_acc)

        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_acc*100:.2f}% - Val Loss: {val_epoch_loss:.4f}, Val Accuracy: {val_epoch_acc*100:.2f}%")

        # scheduler_model1.step()
        # scheduler_model2.step()
        # scheduler_model3.step()

        if val_epoch_acc > best_val_acc:
            best_val_acc = val_epoch_acc
            torch.save(model1.state_dict(), 'best_model1v1.pth')
            torch.save(model2.state_dict(), 'best_model2v1.pth')
            torch.save(model3.state_dict(), 'best_model3v1.pth')
            print(f"Saved best models with validation accuracy: {best_val_acc*100:.4f}")
            counter = 0
        else:
            counter += 1

        if counter >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break

    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.legend()
    plt.title('Loss vs Epochs')

    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='Train Accuracy')
    plt.plot(val_accs, label='Val Accuracy')
    plt.legend()
    plt.title('Accuracy vs Epochs')
    plt.savefig('training_history.png')
    plt.show()

    return model1, model2, model3

In [29]:
def predict_and_create_submission_ensemble(models, test_loader, class_mapping, filename='submission.csv'):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for i, model in enumerate(models):
        models[i] = model.to(device)
        models[i].eval()
        print(f"Model {i+1} running on: {next(models[i].parameters()).device}")

    all_preds = []
    all_mapped_preds = []
    all_filenames = []

    try:
        test_dataset = test_loader.dataset
        for i in range(len(test_dataset)):
            if hasattr(test_dataset, 'samples') and i < len(test_dataset.samples):
                img_path = test_dataset.samples[i][0]
                filename_only = os.path.basename(img_path)
                all_filenames.append(filename_only)
            else:
                all_filenames.append(f"test_image_{i}.jpg")
    except Exception as e:
        print(f"Warning: Couldn't extract filenames from test dataset: {e}")
        all_filenames = [f"test_image_{i}.jpg" for i in range(len(test_loader.dataset))]

    with torch.no_grad():
        for batch_idx, (inputs, _) in enumerate(test_loader):
            inputs = inputs.to(device)

            batch_preds = []
            for model in models:
                outputs = model(inputs)
                probs = torch.softmax(outputs, dim=1)
                batch_preds.append(probs)

            ensemble_probs = sum(batch_preds) / len(models)
            _, preds = torch.max(ensemble_probs, 1)

            all_preds.extend(preds.cpu().numpy())

            if batch_idx % 10 == 0:
                print(f"Processed {batch_idx}/{len(test_loader)} batches")

    for p in all_preds:
        if p in class_mapping:
            all_mapped_preds.append(class_mapping[p])
        else:
            print(f"Warning: Prediction {p} not found in class_mapping. Using raw prediction.")
            all_mapped_preds.append(p)

    formatted_ids = [os.path.splitext(fname)[0] for fname in all_filenames]

    submission_df = pd.DataFrame({
        'id': formatted_ids,
        'label': all_mapped_preds
    })

    print("\nSubmission sample (BEFORE saving):")
    print(submission_df.head(10))

    submission_df.to_csv(filename, index=False)
    print(f"Submission file created: {filename}")

    print("\nVerifying saved file by reading it back:")
    try:
        saved_df = pd.read_csv(filename)
        print("Sample from saved file:")
        print(saved_df.head(10))

        if not submission_df.equals(saved_df):
            print("WARNING: The saved file differs from the generated predictions!")
            diff_mask = submission_df != saved_df
            diff_indices = diff_mask.any(axis=1)
            if diff_indices.any():
                print("Differences found at rows:")
                print(submission_df[diff_indices].compare(saved_df[diff_indices]))
    except Exception as e:
        print(f"Error verifying saved file: {e}")

    return submission_df

In [30]:
def calculate_accuracy(csv_path):
    df = pd.read_csv(csv_path)
    expected_labels = []
    expected_labels.extend([0] * 15)
    expected_labels.extend([1] * 15)
    expected_labels.extend([2] * 15)
    expected_labels.extend([0] * 15)
    expected_labels.extend([1] * 15)
    expected_labels.extend([2] * 15)
    expected_labels.extend([0] * 15)
    expected_labels.extend([1] * 15)
    expected_labels.extend([2] * 15)

    actual_labels = df['label'].tolist()

    n = min(len(actual_labels), len(expected_labels))

    correct = sum(1 for i in range(n) if actual_labels[i] == expected_labels[i])

    accuracy = (correct / n) * 100

    return accuracy

In [31]:
def mainingfull_programming():

    set_seed(seed=42)

    print(f"\nCreating deep-mutual-learning-models with {3} classes...")
    if 'model1' in locals(): del model1
    if 'model2' in locals(): del model2
    if 'model3' in locals(): del model3

    torch.cuda.empty_cache()
    gc.collect()

    model1 = efficientnetb6_model(num_classes=3).to(device)
    model2 = efficientnetv2_m_model(num_classes=3).to(device)
    model3 = resnet101_model(num_classes=3).to(device)

    print("\n=== Starting model training ===")
    trained_model1, trained_model2, trained_model3 = mutual_training_model_v3(
        model1=model1,
        model2=model2,
        model3=model3,
        train_loader=trainloader,
        valid_loader=valloader,
        num_epochs=15,
        learning_rate=0.001
    )
    print("Training completed!")

    model1.load_state_dict(torch.load('best_model1v1.pth'))
    model2.load_state_dict(torch.load('best_model2v1.pth'))
    model3.load_state_dict(torch.load('best_model3v1.pth'))

    print("Models loaded successfully!")

    print("\n=== Making predictions on test data ===")

    submission_df = predict_and_create_submission_ensemble(
        models=[model1, model2, model3],
        test_loader=testloader,
        class_mapping=class_mapping,
        filename='submission-mutual-3-models.csv'
    )

    print("Test accuracy: ", calculate_accuracy("submission-mutual-3-models.csv"))

    print("\n=== Process completed successfully! ===")

In [None]:
mainingfull_programming()

Seeds set to 42 for reproducibility

Creating deep-mutual-learning-models with 3 classes...

=== Starting model training ===


  scaler = torch.cuda.amp.GradScaler(init_scale=2**16, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000)
  with torch.cuda.amp.autocast():


In [None]:
model1 = efficientnetb6_model(num_classes=3).to(device)
model2 = efficientnetv2_m_model(num_classes=3).to(device)
model3 = resnet101_model(num_classes=3).to(device)

model1.load_state_dict(torch.load('best_model1v1.pth'))
model2.load_state_dict(torch.load('best_model2v1.pth'))
model3.load_state_dict(torch.load('best_model3v1.pth'))

submission_df = predict_and_create_submission_ensemble(
    models=[model1, model2, model3],
    test_loader=testloader,
    class_mapping=class_mapping,
    filename='submission-mutual-models.csv',
)

print("Test accuracy: ", calculate_accuracy("submission-mutual-models.csv"))

# 37.037% 
# 38.518%


  model1.load_state_dict(torch.load('best_model1v1.pth'))
  model2.load_state_dict(torch.load('best_model2v1.pth'))
  model3.load_state_dict(torch.load('best_model3v1.pth'))


Model 1 running on: cuda:0
Model 2 running on: cuda:0
Model 3 running on: cuda:0
Processed 0/8 batches

Submission sample (BEFORE saving):
                       id  label
0  1745420428784_f1920216      1
1  1745420428805_9a5fd5ef      0
2  1745420428822_c3c88f24      0
3  1745420428837_9ebdfcb4      2
4  1745420428853_f0f50264      1
5  1745420428868_0ed5a51e      0
6  1745420428883_b32827f9      1
7  1745420428899_4d2f9367      2
8  1745420428915_dd9809c3      1
9  1745420428931_b4bebe2f      2
Submission file created: submission-mutual-models.csv

Verifying saved file by reading it back:
Sample from saved file:
                       id  label
0  1745420428784_f1920216      1
1  1745420428805_9a5fd5ef      0
2  1745420428822_c3c88f24      0
3  1745420428837_9ebdfcb4      2
4  1745420428853_f0f50264      1
5  1745420428868_0ed5a51e      0
6  1745420428883_b32827f9      1
7  1745420428899_4d2f9367      2
8  1745420428915_dd9809c3      1
9  1745420428931_b4bebe2f      2
Test accuracy: 

In [None]:
from PIL import Image
import torchvision.transforms as transforms
import torch
import numpy as np
import os

input_dir = r'D:\6. OAI HCMC 2025\oai-final\datack\data\test'
output_dir = r'D:\6. OAI HCMC 2025\oai-final\datack\data\test'

# Áp dụng biến đổi để thay đổi kích thước về 32x32
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Thay đổi kích thước ảnh thành 32x32
    transforms.ToTensor(),        # Chuyển đổi ảnh thành Tensor
])

# Chuyển tensor thành định dạng để lưu ảnh
def save_tensor_as_jpg(tensor, filename):
    # Chuyển tensor về dạng numpy array với giá trị từ 0-255
    np_array = tensor.permute(1, 2, 0).numpy() * 255.0
    np_array = np_array.astype(np.uint8)
    
    # Tạo ảnh từ numpy array
    pil_image = Image.fromarray(np_array)
    
    # Lưu ảnh dưới dạng JPG
    pil_image.save(filename)
    print(f"Đã lưu ảnh: {filename}")

# Xử lý các ảnh từ 1.jpg đến 15.jpg và lưu với tên từ 046.jpg đến 060.jpg
for i in range(1, 16):
    # Tạo đường dẫn đến file ảnh gốc
    input_filename = f"{i}.jpg"
    input_path = os.path.join(input_dir, input_filename)
    
    # Tạo tên file đầu ra
    output_number = 120 + i  # 1 -> 46, 2 -> 47, ..., 15 -> 60
    output_filename = f"{output_number:03d}.jpg"
    output_path = os.path.join(output_dir, output_filename)
    
    try:
        # Kiểm tra xem file gốc có tồn tại không
        if os.path.exists(input_path):
            # Đọc ảnh
            img = Image.open(input_path).convert('RGB')
            
            # Biến đổi ảnh
            img_transformed = transform(img)
            
            # Lưu ảnh đã biến đổi
            save_tensor_as_jpg(img_transformed, output_path)
            
            # Xóa file gốc sau khi xử lý xong
            os.remove(input_path)
            print(f"Đã xóa ảnh gốc: {input_filename}")
        else:
            print(f"Không tìm thấy file: {input_filename}")
    except Exception as e:
        print(f"Lỗi khi xử lý ảnh {input_filename}: {e}")

print("Hoàn tất việc xử lý, lưu và xóa ảnh!")

Không tìm thấy file: 1.jpg
Không tìm thấy file: 2.jpg
Không tìm thấy file: 3.jpg
Không tìm thấy file: 4.jpg
Không tìm thấy file: 5.jpg
Không tìm thấy file: 6.jpg
Không tìm thấy file: 7.jpg
Không tìm thấy file: 8.jpg
Không tìm thấy file: 9.jpg
Không tìm thấy file: 10.jpg
Không tìm thấy file: 11.jpg
Không tìm thấy file: 12.jpg
Không tìm thấy file: 13.jpg
Không tìm thấy file: 14.jpg
Không tìm thấy file: 15.jpg
Hoàn tất việc xử lý, lưu và xóa ảnh!
