In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

# Check if GPU is available, and if not, use the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
import torch.nn.functional as F
from torchvision.datasets import VOCDetection
from torchvision import transforms
import torch 
from torch.utils.data import DataLoader

In [4]:
student_model = torch.hub.load("ultralytics/yolov5", "yolov5s")

Using cache found in /home/minhh/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-8-30 Python-3.8.10 torch-2.4.0+cu121 CUDA:0 (NVIDIA GeForce RTX 3090, 24245MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [10]:
!pip install dill

  from pkg_resources import load_entry_point
Collecting dill
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[K     |████████████████████████████████| 116 kB 1.3 MB/s eta 0:00:01
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.8


In [11]:
teacher_model = torch.hub.load("ultralytics/yolov5","custom",path = "runs/train/exp5/weights/best.pt", force_reload=True)
teacher_model.eval()

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /home/minhh/.cache/torch/hub/master.zip
YOLOv5 🚀 2024-8-31 Python-3.8.10 torch-2.4.0+cu121 CUDA:0 (NVIDIA GeForce RTX 3090, 24245MiB)

Fusing layers... 
Model summary: 416 layers, 140730220 parameters, 0 gradients, 209.6 GFLOPs
Adding AutoShape... 


AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 80, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(80, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(160, 80, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(160, 80, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(160, 160, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1

In [14]:
transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
])

In [15]:
# Load the VOC2012 dataset
train_dataset = VOCDetection(root='./data', year='2012', image_set='train', download=True, transform=transform)
val_dataset = VOCDetection(root='./data', year='2012', image_set='val', download=True, transform=transform)

Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data
Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data


In [34]:
def custom_collate_fn(batch):
    # Loại bỏ các phần tử None
    batch = [b for b in batch if b is not None]
    
    # Tạo một danh sách chứa các ảnh và nhãn riêng biệt
    images = torch.stack([item[0] for item in batch], dim=0)
    targets = [item[1] for item in batch]
    
    return images, targets


In [57]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, collate_fn=custom_collate_fn)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2, collate_fn=custom_collate_fn)


In [50]:
import numpy as np

In [71]:
import torch
import numpy as np
import torch.optim as optim
import torch.nn as nn

def train(model, train_loader, epochs, learning_rate, device):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0

        for batch in train_loader:
            images, targets = batch
            images = images.to(device)

            if images.size(0) == 0:
                continue  # Skip empty batch

            # YOLOv5 returns a list of outputs
            outputs = model(images)[0]  # Select the first output

            # Debugging: Print type and content of targets
            print(f"Type of targets: {type(targets)}")
            print(f"Content of targets: {targets}")

            # Extract numeric data from targets
            if isinstance(targets, dict):
                # Assuming the dictionary contains tensors or lists of tensors
                target_values = []
                for key, value in targets.items():
                    if isinstance(value, (list, np.ndarray, torch.Tensor)):
                        # Convert to numpy array if it's a tensor
                        if isinstance(value, torch.Tensor):
                            value = value.numpy()
                        target_values.append(value)
                    else:
                        print(f"Unexpected type in targets dictionary for key {key}: {type(value)}")
                
                if len(target_values) == 0:
                    print("No valid numeric data found in targets.")
                    continue
                
                # Flatten the list of arrays/tensors if necessary
                target_values = np.concatenate([np.array(v) for v in target_values], axis=0)
                targets_tensor = torch.tensor(target_values, dtype=torch.float).to(device)
            else:
                # Handle if targets is not a dictionary
                if isinstance(targets, (list, np.ndarray, torch.Tensor)):
                    # Convert to tensor directly
                    if isinstance(targets, torch.Tensor):
                        targets_tensor = targets.to(device)
                    else:
                        targets_tensor = 
                        # targets_tensor = torch.tensor(targets, dtype=torch.float).to(device)
                elif isinstance(targets, dict):
                    print(f"Unexpected dictionary type for targets: {targets}")
                    # Handle the dictionary case, e.g., extract relevant values
                    # targets_tensor = handle_dict(targets)
                else:
                    print(f"Unexpected type for targets: {type(targets)}")
                    continue


            # Compute the loss
            loss_fn = nn.MSELoss()  # Example loss function
            loss = loss_fn(outputs, targets_tensor)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(train_loader)}")


In [59]:
epochs = 5
learning_rate = 0.001

In [72]:
train(teacher_model, train_loader, epochs, learning_rate, device)

Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_005855.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '283', 'xmin': '97', 'ymax': '315', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_001970.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bicycle', 'bndbox': {'xmax': '280', 'xmin': '72', 'ymax': '488', 'ymin': '323'}, 'difficult': '0', 'occluded': '1', 'pose': 'Right', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '375'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_001031.jpg', 'folder': 'VOC2012', 'object': [{'name': 'sofa', 'bndbox': {'xma

  with amp.autocast(autocast):


TypeError: must be real number, not dict

In [None]:
def val(model, val_loader, device):
    model.to(device)
    model.eval()

    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy