# Bài 1

In [None]:
!ls / /

/:
bin			    dev     lib32   NGC-DL-CONTAINER-LICENSE  root  tmp
boot			    etc     lib64   opt			      run   tools
content			    home    libx32  proc		      sbin  usr
cuda-keyring_1.0-1_all.deb  kaggle  media   python-apt		      srv   var
datalab			    lib     mnt     python-apt.tar.xz	      sys

/:
bin			    dev     lib32   NGC-DL-CONTAINER-LICENSE  root  tmp
boot			    etc     lib64   opt			      run   tools
content			    home    libx32  proc		      sbin  usr
cuda-keyring_1.0-1_all.deb  kaggle  media   python-apt		      srv   var
datalab			    lib     mnt     python-apt.tar.xz	      sys


In [None]:
import kagglehub

data_dir = kagglehub.dataset_download("andrewmvd/dog-and-cat-detection")
print("Path to dataset files:",data_dir)

Downloading from https://www.kaggle.com/api/v1/datasets/download/andrewmvd/dog-and-cat-detection?dataset_version_number=1...


100%|██████████| 1.03G/1.03G [00:13<00:00, 81.5MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/andrewmvd/dog-and-cat-detection/versions/1


In [None]:
import os
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET

from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from torchvision.models.resnet import ResNet18_Weights

In [None]:
class ImageDataset(Dataset):
  def __init__(self,annotations_dir,image_dir,transform = None):
    self.annotations_dir = annotations_dir
    self.image_dir = image_dir
    self.transform = transform
    self.image_files = self.filter_images_with_multiple_objects()
  def filter_images_with_multiple_objects(self):
    valid_image_files = []
    for f in os.listdir(self.image_dir):
      if os.path.isfile(os.path.join(self.image_dir,f)):
        img_name = f
        annotation_name = os.path.splitext(img_name)[0] + ".xml"
        annotation_path = os.path.join(self.annotations_dir,annotation_name)

        if self.count_objects_in_annotation(annotation_path) <=1:
          valid_image_files.append(img_name)
        else:
          print(f"Image {img_name} has multiple objects and will be excluded from the dataset")
    return valid_image_files
  def count_objects_in_annotation(self,annotation_path):
    try:
      tree = ET.parse(annotation_path)
      root = tree.getroot()
      count = 0
      for obj in root.findall("object"):
        count+=1
      return count
    except FileNotFoundError:
      return 0
  def __len__(self):
    return len(self.image_files)
  def __getitem__(self,idx):
    img_name = self.image_files[idx]
    img_path = os.path.join(self.image_dir,img_name)

    image = Image.open(img_path).convert("RGB")

    annotation_name = os.path.splitext(img_name)[0] + ".xml"
    annotation_path = os.path.join(self.annotations_dir,annotation_name)

    label = self.parse_annotation(annotation_path)

    if self.transform:
      image = self.transform(image)
    return image,label
  def parse_annotation(self,annotation_path):
    tree = ET.parse(annotation_path)
    root = tree.getroot()

    label = None
    for obj in root.findall("object"):
      name = obj.find("name").text
      if ( label is None):
        label = name
    label_num = 0 if label ==" cat" else 1 if label =="dog" else 2
    return label_num


In [None]:
annotations_dir = os.path.join(data_dir,"annotations")
image_dir = os.path.join(data_dir,"images")
print(os.listdir(image_dir))
image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir,f))]
df = pd.DataFrame({"image_name":image_files})

train_df,val_df = train_test_split(df,test_size=0.2,random_state=42)

['Cats_Test2285.png', 'Cats_Test1306.png', 'Cats_Test1098.png', 'Cats_Test1490.png', 'Cats_Test137.png', 'Cats_Test3025.png', 'Cats_Test1893.png', 'Cats_Test1445.png', 'Cats_Test1199.png', 'Cats_Test2548.png', 'Cats_Test3351.png', 'Cats_Test2959.png', 'Cats_Test2314.png', 'Cats_Test1558.png', 'Cats_Test2457.png', 'Cats_Test3650.png', 'Cats_Test3629.png', 'Cats_Test2280.png', 'Cats_Test2054.png', 'Cats_Test3240.png', 'Cats_Test3623.png', 'Cats_Test3021.png', 'Cats_Test1296.png', 'Cats_Test1519.png', 'Cats_Test52.png', 'Cats_Test1408.png', 'Cats_Test1156.png', 'Cats_Test1170.png', 'Cats_Test2728.png', 'Cats_Test1530.png', 'Cats_Test2311.png', 'Cats_Test2113.png', 'Cats_Test2568.png', 'Cats_Test95.png', 'Cats_Test1981.png', 'Cats_Test2808.png', 'Cats_Test1103.png', 'Cats_Test2488.png', 'Cats_Test2336.png', 'Cats_Test1808.png', 'Cats_Test3561.png', 'Cats_Test1778.png', 'Cats_Test813.png', 'Cats_Test1318.png', 'Cats_Test3626.png', 'Cats_Test2623.png', 'Cats_Test2546.png', 'Cats_Test3580.png

In [None]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
])

train_dataset = ImageDataset(annotations_dir,image_dir,transform=transform)
val_dataset = ImageDataset(annotations_dir,image_dir,transform=transform)

train_dataset.image_files = [f for f in train_dataset.image_files if f in train_df['image_name'].values]
val_dataset.image_files = [f for f in val_dataset.image_files if f in val_df['image_name'].values]

train_loader = DataLoader(train_dataset,batch_size=32,shuffle=True)
val_loader = DataLoader(val_dataset,batch_size=32,shuffle=False)

Image Cats_Test736.png has multiple objects and will be excluded from the dataset
Image Cats_Test736.png has multiple objects and will be excluded from the dataset


In [None]:
model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs,3)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

print(model)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 122MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
num_epochs = 10
for epoch in range(num_epochs):
  model.train()
  for batch_idx,(data,targets) in enumerate(train_loader):
    data = data.to(device)
    targets = targets.to(device)

    scores = model(data)
    loss = criterion(scores,targets)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for data,targets in val_loader:
      data = data.to(device)
      targets = targets.to(device)
      scores = model(data)
      _,predictions = scores.max(1)
      correct += (predictions == targets).sum()
      total += targets.size(0)
  print(f"Epoch {epoch+1}/{num_epochs}, Validation Accuracy: {float(correct)/float(total)*100:.2f}%")

Epoch 1/10, Validation Accuracy: 88.60%
Epoch 2/10, Validation Accuracy: 89.96%
Epoch 3/10, Validation Accuracy: 92.13%
Epoch 4/10, Validation Accuracy: 83.45%
Epoch 5/10, Validation Accuracy: 93.22%
Epoch 6/10, Validation Accuracy: 94.84%
Epoch 7/10, Validation Accuracy: 94.84%
Epoch 8/10, Validation Accuracy: 94.44%
Epoch 9/10, Validation Accuracy: 94.98%
Epoch 10/10, Validation Accuracy: 94.30%


# Bài 2

In [None]:
import kagglehub

data_dir = kagglehub.dataset_download("andrewmvd/dog-and-cat-detection")
print("Path to dataset files:",data_dir)

Downloading from https://www.kaggle.com/api/v1/datasets/download/andrewmvd/dog-and-cat-detection?dataset_version_number=1...


100%|██████████| 1.03G/1.03G [00:48<00:00, 22.9MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/andrewmvd/dog-and-cat-detection/versions/1


In [None]:
%ls /

[0m[01;36mbin[0m@                        [01;34mdev[0m/     [01;36mlib32[0m@   NGC-DL-CONTAINER-LICENSE  [01;34mroot[0m/  [30;42mtmp[0m/
[01;34mboot[0m/                       [01;34metc[0m/     [01;36mlib64[0m@   [01;34mopt[0m/                      [01;34mrun[0m/   [01;34mtools[0m/
[01;34mcontent[0m/                    [01;34mhome[0m/    [01;36mlibx32[0m@  [01;34mproc[0m/                     [01;36msbin[0m@  [01;34musr[0m/
cuda-keyring_1.0-1_all.deb  [01;34mkaggle[0m/  [01;34mmedia[0m/   [01;34mpython-apt[0m/               [01;34msrv[0m/   [01;34mvar[0m/
[01;34mdatalab[0m/                    [01;36mlib[0m@     [01;34mmnt[0m/     [01;32mpython-apt.tar.xz[0m*        [01;34msys[0m/


In [None]:
import pandas

In [None]:
import os
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET

from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from torchvision.models.resnet import ResNet18_Weights

In [None]:
class ImageDataset(Dataset):
  def __init__(self,annotations_dir,image_dir,transform = None):
    self.annotations_dir = annotations_dir
    self.image_dir = image_dir
    self.transform = transform
    self.image_files = self.filter_images_with_multiple_objects()
  def filter_images_with_multiple_objects(self):
    valid_image_files = []
    for f in os.listdir(self.image_dir):
      if os.path.isfile(os.path.join(self.image_dir,f)):
        img_name = f
        annotation_name = os.path.splitext(img_name)[0] + ".xml"
        annotation_path = os.path.join(self.annotations_dir,annotation_name)

        if self.count_objects_in_annotation(annotation_path) ==1:
          valid_image_files.append(img_name)
    return valid_image_files
  def count_objects_in_annotation(self,annotation_path):
    try:
      tree = ET.parse(annotation_path)
      root = tree.getroot()
      count = 0
      for obj in root.findall("object"):
        count+=1
      return count
    except FileNotFoundError:
      return 0
  def __len__(self):
    return len(self.image_files)
  def __getitem__(self,idx):
    img_name = self.image_files[idx]
    img_path = os.path.join(self.image_dir,img_name)

    image = Image.open(img_path).convert("RGB")
    annotation_name = os.path.splitext(img_name)[0] + ".xml"
    annotation_path = os.path.join(self.annotations_dir,annotation_name)

    label,bbox = self.parse_annotation(annotation_path)

    if self.transform:
      image = self.transform(image)
    return image,label,bbox
  def parse_annotation(self,annotation_path):
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    image_width = int(root.find("size/width").text)
    image_height = int(root.find("size/height").text)
    label = None
    bbox = None
    for obj in root.findall("object"):
      name=  obj.find("name").text
      if ( label is None):
        label = name
        xmin = int(obj.find("bndbox/xmin").text)
        ymin = int(obj.find("bndbox/ymin").text)
        xmax = int(obj.find("bndbox/xmax").text)
        ymax = int(obj.find("bndbox/ymax").text)
        bbox = [xmin/image_width,ymin/image_height,xmax/image_width,ymax/image_height]
    label_num = 0 if label ==" cat" else 1 if label =="dog" else 2
    return label_num,torch.tensor(bbox,dtype=torch.float32)


In [None]:
annotations_dir = os.path.join(data_dir,"annotations")
image_dir = os.path.join(data_dir,"images")
print(os.listdir(image_dir))
image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir,f))]
df = pd.DataFrame({"image_name":image_files})

train_df,val_df = train_test_split(df,test_size=0.2,random_state=42)

['Cats_Test2285.png', 'Cats_Test1306.png', 'Cats_Test1098.png', 'Cats_Test1490.png', 'Cats_Test137.png', 'Cats_Test3025.png', 'Cats_Test1893.png', 'Cats_Test1445.png', 'Cats_Test1199.png', 'Cats_Test2548.png', 'Cats_Test3351.png', 'Cats_Test2959.png', 'Cats_Test2314.png', 'Cats_Test1558.png', 'Cats_Test2457.png', 'Cats_Test3650.png', 'Cats_Test3629.png', 'Cats_Test2280.png', 'Cats_Test2054.png', 'Cats_Test3240.png', 'Cats_Test3623.png', 'Cats_Test3021.png', 'Cats_Test1296.png', 'Cats_Test1519.png', 'Cats_Test52.png', 'Cats_Test1408.png', 'Cats_Test1156.png', 'Cats_Test1170.png', 'Cats_Test2728.png', 'Cats_Test1530.png', 'Cats_Test2311.png', 'Cats_Test2113.png', 'Cats_Test2568.png', 'Cats_Test95.png', 'Cats_Test1981.png', 'Cats_Test2808.png', 'Cats_Test1103.png', 'Cats_Test2488.png', 'Cats_Test2336.png', 'Cats_Test1808.png', 'Cats_Test3561.png', 'Cats_Test1778.png', 'Cats_Test813.png', 'Cats_Test1318.png', 'Cats_Test3626.png', 'Cats_Test2623.png', 'Cats_Test2546.png', 'Cats_Test3580.png

In [None]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
])

train_dataset = ImageDataset(annotations_dir,image_dir,transform=transform)
val_dataset = ImageDataset(annotations_dir,image_dir,transform=transform)

train_dataset.image_files = [f for f in train_dataset.image_files if f in train_df['image_name'].values]
val_dataset.image_files = [f for f in val_dataset.image_files if f in val_df['image_name'].values]

train_loader = DataLoader(train_dataset,batch_size=32,shuffle=True)
val_loader = DataLoader(val_dataset,batch_size=32,shuffle=False)

In [None]:
class TwoHeadedModel(nn.Module):
  def __init__(self,num_classes=3):
    super(TwoHeadedModel,self).__init__()
    self.base_model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
    self.num_ftrs = self.base_model.fc.in_features
    self.base_model.fc = nn.Identity()
    self.classifier = nn.Linear(self.num_ftrs,num_classes)
    self.regressor = nn.Linear(self.num_ftrs,4)
  def forward(self,x):
    x = self.base_model(x)
    class_logits = self.classifier(x)
    bbox_coords = torch.sigmoid(self.regressor(x))
    return class_logits,bbox_coords

In [None]:
model = TwoHeadedModel()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion_class = nn.CrossEntropyLoss()
criterion_bbox = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

In [None]:
num_epochs = 10
for epoch in range(num_epochs):
  model.train()
  for batch_idx,(data,targets,bbox) in enumerate(train_loader):
    data = data.to(device)
    targets = targets.to(device)
    bbox = bbox.to(device)

    scores,pred_bboxes = model(data)
    loss_class = criterion_class(scores,targets)
    loss_bbox = criterion_bbox(pred_bboxes,bbox)
    loss = loss_class + loss_bbox
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  model.eval()
  with torch.no_grad():
    correct = 0
    total_loss_bbox = 0
    total_samples = 0
    for data,targets,bbox in val_loader:
      data = data.to(device)
      targets = targets.to(device)
      bbox =  bbox.to(device)
      scores,pred_bboxes = model(data)
      _,predictions = scores.max(1)
      correct += (predictions == targets).sum()
      total_loss_bbox += criterion_bbox(pred_bboxes,bbox).item() * data.size(0)
      total_samples += targets.size(0)
    avg_loss_bbox = total_loss_bbox/total_samples
  print(f"Epoch {epoch+1}/{num_epochs}, Validation Accuracy: {float(correct)/float(total_samples)*100:.2f}%, Avg. Bbox Loss: {avg_loss_bbox:.4f}")

Epoch 1/10, Validation Accuracy: 90.91%, Avg. Bbox Loss: 0.0118
Epoch 2/10, Validation Accuracy: 91.45%, Avg. Bbox Loss: 0.0130
Epoch 3/10, Validation Accuracy: 89.69%, Avg. Bbox Loss: 0.0096
Epoch 4/10, Validation Accuracy: 93.76%, Avg. Bbox Loss: 0.0099
Epoch 5/10, Validation Accuracy: 93.76%, Avg. Bbox Loss: 0.0094
Epoch 6/10, Validation Accuracy: 92.27%, Avg. Bbox Loss: 0.0132
Epoch 7/10, Validation Accuracy: 92.67%, Avg. Bbox Loss: 0.0095
Epoch 8/10, Validation Accuracy: 91.32%, Avg. Bbox Loss: 0.0109
Epoch 9/10, Validation Accuracy: 95.39%, Avg. Bbox Loss: 0.0073
Epoch 10/10, Validation Accuracy: 95.93%, Avg. Bbox Loss: 0.0081
