### Dùng EfficientNet cho dự án phân loại ảnh 16+

In [1]:
pip install torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import torch
print(torch.cuda.is_available())

True


In [3]:
! pip install numpy matplotlib Pillow scikit-learn



Chuẩn bị dữ liệu

In [4]:
HOME = "/content/drive/MyDrive/ImageGuard/training_tuanthanh"
DATA = "/content/drive/MyDrive/ImageGuard/dataset_classification"

In [5]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Định nghĩa transforms
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),  # Tăng độ xoay
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  # Thay đổi màu sắc
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Dịch chuyển ảnh
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dữ liệu
train_dataset = datasets.ImageFolder(f'{DATA}/train', transform=train_transforms)
val_dataset = datasets.ImageFolder(f'{DATA}/val', transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Kiểm tra số lớp
print(f"Số lớp: {len(train_dataset.classes)}")  # Số lớp: 3
print(f"Nhãn: {train_dataset.classes}")  # Nhãn: ['Adult', 'Normal', 'Violent']

Số lớp: 3
Nhãn: ['Adult', 'Normal', 'Violent']


Tải và tinh chỉnh mô hình EfficientNet

In [6]:
from torchvision.models import efficientnet_b0
import torch.nn as nn

# Tải mô hình pre-trained
model = efficientnet_b0(pretrained=True)

# Thay đổi lớp đầu ra
num_classes = 3
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

# Chuyển mô hình sang GPU nếu có
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 163MB/s]


Định nghĩa hàm loss và optimizer

In [19]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()  # Hàm mất mát cho phân loại
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Optimizer

Hàm huấn luyện

In [21]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, patience=5, save_path="best_model.pth"):
    best_acc = 0.0  # Lưu accuracy tốt nhất
    counter = 0  # Đếm số epoch không cải thiện

    for epoch in range(num_epochs):
        # Huấn luyện
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()  # Xóa gradient cũ
            outputs = model(images)  # Dự đoán
            loss = criterion(outputs, labels)  # Tính mất mát
            loss.backward()  # Lan truyền ngược
            optimizer.step()  # Cập nhật trọng số

            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

        # Đánh giá trên tập val
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print(f"Validation Accuracy: {accuracy:.2f}%")

        # Lưu mô hình nếu accuracy tốt hơn
        if accuracy > best_acc:
            best_acc = accuracy
            torch.save(model.state_dict(), save_path)
            print(f"Saved best model with Validation Accuracy: {best_acc:.2f}%")
            counter = 0  # Reset counter nếu có cải thiện
        else:
            counter += 1
            print(f"No improvement in {counter}/{patience} epochs")

        # Early stopping
        if counter >= patience:
            print("Early stopping triggered")
            break

In [23]:
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=15, patience=15, save_path="best_efficientnet_model.pth")

Epoch [1/15], Loss: 0.0717
Validation Accuracy: 92.22%
Saved best model with Validation Accuracy: 92.22%
Epoch [2/15], Loss: 0.0538
Validation Accuracy: 94.07%
Saved best model with Validation Accuracy: 94.07%
Epoch [3/15], Loss: 0.1081
Validation Accuracy: 94.81%
Saved best model with Validation Accuracy: 94.81%
Epoch [4/15], Loss: 0.0841
Validation Accuracy: 94.07%
No improvement in 1/15 epochs
Epoch [5/15], Loss: 0.0389
Validation Accuracy: 94.44%
No improvement in 2/15 epochs
Epoch [6/15], Loss: 0.0457
Validation Accuracy: 93.33%
No improvement in 3/15 epochs
Epoch [7/15], Loss: 0.0366
Validation Accuracy: 91.85%
No improvement in 4/15 epochs
Epoch [8/15], Loss: 0.0690
Validation Accuracy: 94.07%
No improvement in 5/15 epochs
Epoch [9/15], Loss: 0.0506
Validation Accuracy: 94.81%
No improvement in 6/15 epochs
Epoch [10/15], Loss: 0.0343
Validation Accuracy: 94.07%
No improvement in 7/15 epochs
Epoch [11/15], Loss: 0.0427
Validation Accuracy: 95.56%
Saved best model with Validation 

Lưu mô hình đã huấn luyện

Tải và dự đoán trên ảnh mới

In [28]:
from PIL import Image

# Tải mô hình
model = efficientnet_b0(pretrained=False)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model.load_state_dict(torch.load("best_efficientnet_model.pth"))
model = model.to(device)
model.eval()

# Hàm dự đoán
def predict_image(image_path):
    image = Image.open(image_path).convert("RGB")
    image = val_transforms(image).unsqueeze(0).to(device)  # Thêm batch dimension
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
    class_names = ['Adult', 'Normal', 'Violent']
    return class_names[predicted.item()]

# Ví dụ sử dụng
result = predict_image("/content/drive/MyDrive/ImageGuard/training_tuanthanh/test_image.png")
print(f"Dự đoán: {result}")

Dự đoán: Normal


Hàm test

In [29]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix
import numpy as np

# Định nghĩa transforms cho tập test (giống với tập val)
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load tập test
test_dataset = datasets.ImageFolder(f'{DATA}/test', transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def test_model(model, test_loader, device, model_path="best_efficientnet_model.pth"):
    # Tải mô hình đã lưu
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model.eval()

    # Lưu trữ dự đoán và nhãn thực tế
    all_preds = []
    all_labels = []

    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Lưu dự đoán và nhãn để tính các chỉ số khác
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Tính accuracy
    test_accuracy = 100 * correct / total
    print(f"Test Accuracy: {test_accuracy:.2f}%")

    # Tính precision, recall, F1-score
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None)
    class_names = test_dataset.classes  # ['adult', 'normal', 'violence']

    print("\nPer-class metrics:")
    for i, class_name in enumerate(class_names):
        print(f"Class: {class_name}")
        print(f"  Precision: {precision[i]:.4f}")
        print(f"  Recall: {recall[i]:.4f}")
        print(f"  F1-score: {f1[i]:.4f}")

    # Tính và hiển thị confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    print("\nConfusion Matrix:")
    print(f"{'':>12} {'Predicted':>20}")
    print(f"{'':>12} {'adult':>8} {'normal':>8} {'violence':>8}")
    for i, class_name in enumerate(class_names):
        print(f"True {class_name:>8} {cm[i, 0]:>8} {cm[i, 1]:>8} {cm[i, 2]:>8}")

Test trên data test

In [31]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
test_model(model, test_loader, device, model_path="best_efficientnet_model.pth")

Test Accuracy: 97.04%

Per-class metrics:
Class: Adult
  Precision: 1.0000
  Recall: 1.0000
  F1-score: 1.0000
Class: Normal
  Precision: 0.9528
  Recall: 0.9712
  F1-score: 0.9619
Class: Violent
  Precision: 0.9595
  Recall: 0.9342
  F1-score: 0.9467

Confusion Matrix:
                        Predicted
                adult   normal violence
True    Adult       90        0        0
True   Normal        0      101        3
True  Violent        0        5       71
