<a href="https://colab.research.google.com/github/podo47/DL_HW4/blob/main/DL_HW4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Mount to drive

In [1]:
from google.colab import drive
drive.mount('/content/drive') #連接到自己的雲端

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Unzip data

In [None]:
!tar -xvf /content/drive/MyDrive/DL/HW4/VOC_2007/VOCtrainval_06-Nov-2007.tar -C /content/drive/MyDrive/DL/HW4/My_VOC2007

## Import library

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
from google.colab.patches import cv2_imshow

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
from torchvision.transforms import transforms

from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image

In [3]:
import warnings
warnings.filterwarnings('ignore')

# Voc 2007

## Import data

In [4]:
train = pd.read_csv('/content/drive/MyDrive/DL/HW4/My_VOC2007/train.txt', sep=" ",header=None, dtype={0: str})
train_dir = np.array(train[0])

valid = pd.read_csv('/content/drive/MyDrive/DL/HW4/My_VOC2007/val.txt', sep=" ",header=None, dtype={0: str})
valid_dir = np.array(valid[0])

test = pd.read_csv('/content/drive/MyDrive/DL/HW4/My_VOC2007/test.txt', sep=" ",header=None, dtype={0: str})
test_dir = np.array(test[0])

### Read image into array

In [14]:
def read_images_to_array(data_dir):
  X = []
  for imgname in data_dir:
      folder_path = '/content/drive/MyDrive/DL/HW4/My_VOC2007/VOCdevkit/VOC2007/JPEGImages'
      file_name = imgname + '.jpg'
      file_path = os.path.join(folder_path, file_name)
      if os.path.isfile(file_path):
        img = cv2.imread(file_path)
        img_ar = np.array(img)
        X.append(img_ar)
  X = np.array(X)
  return X

In [24]:
# To save time, you can just skip this step, as the output has already been stored

X_train = read_images_to_array(train_dir)
X_valid = read_images_to_array(valid_dir)
X_test = read_images_to_array(test_dir)

In [25]:
# save them to a file
np.savez("/content/drive/MyDrive/DL/HW4/dataset.npz", traindata=X_train, validdata=X_valid, testdata=X_test)

In [None]:
with np.load("/content/drive/MyDrive/DL/HW4/dataset.npz") as data:
    X_train = data["traindata"]
    X_valid = data["validdata"]
    X_test = data["testdata"]

### Read label

In [30]:
import xml.etree.ElementTree as ET

In [45]:
def parse_voc_annotation(annotation_file):
    tree = ET.parse(annotation_file)
    root = tree.getroot()

    objects = []
    for object_element in root.findall('object'):
        # 獲取物件的類別名稱
        class_name = object_element.find('name').text

        # 獲取物件的邊界框座標
        bbox_element = object_element.find('bndbox')
        xmin = int(bbox_element.find('xmin').text)
        ymin = int(bbox_element.find('ymin').text)
        xmax = int(bbox_element.find('xmax').text)
        ymax = int(bbox_element.find('ymax').text)

        # 將物件資訊添加到列表中
        object_info = {
            'class_name': class_name,
            'bbox': [xmin, ymin, xmax, ymax]
        }
        objects.append(object_info)
    objects = np.array(objects)

    return objects

In [46]:
def voc_label(data_dir):
  label = []
  for imgname in data_dir:
        folder_path = '/content/drive/MyDrive/DL/HW4/My_VOC2007/VOCdevkit/VOC2007/Annotations'
        file_name = imgname + '.xml'
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path):
          obj = parse_voc_annotation(file_path)
          label.append(obj)
  label = np.array(label)
  return label


In [47]:
train_y = voc_label(train_dir)

In [48]:
valid_y = voc_label(valid_dir)
test_y = voc_label(test_dir)

# ADE20K

## Import data

In [4]:
train_a = pd.read_csv('/content/drive/MyDrive/DL/HW4/ADE20K/ADE20K_DL_course/train.txt', sep=" ",header=None, dtype={0: str})
train_dir_a = np.array(train_a[0])

valid_a = pd.read_csv('/content/drive/MyDrive/DL/HW4/ADE20K/ADE20K_DL_course/val.txt', sep=" ",header=None, dtype={0: str})
valid_dir_a = np.array(valid_a[0])

test_a = pd.read_csv('/content/drive/MyDrive/DL/HW4/ADE20K/ADE20K_DL_course/test.txt', sep=" ",header=None, dtype={0: str})
test_dir_a = np.array(test_a[0])

### Read image into array

In [5]:
def read_images_to_array_a(data_dir):
  X = []
  for imgname in data_dir:
      folder_path = '/content/drive/MyDrive/DL/HW4/ADE20K/ADE20K_DL_course/imgs'
      file_name = 'ADE_val_'+imgname + '.jpg'
      file_path = os.path.join(folder_path, file_name)
      if os.path.isfile(file_path):
        img = cv2.imread(file_path)
        img_ar = np.array(img)
        X.append(img_ar)
  X = np.array(X)
  return X

In [6]:
# To save time, you can just skip this step, as the output has already been stored

X_train_a = read_images_to_array_a(train_dir_a)
X_valid_a = read_images_to_array_a(valid_dir_a)
X_test_a = read_images_to_array_a(test_dir_a)

In [None]:
# save them to a file
np.savez("/content/drive/MyDrive/DL/HW4/dataset_ade.npz", traindata=X_train_a, validdata=X_valid_a, testdata=X_test_a)

In [None]:
with np.load("/content/drive/MyDrive/DL/HW4/dataset_ade.npz") as data:
    X_train_a = data["traindata"]
    X_valid_a = data["validdata"]
    X_test_a = data["testdata"]

In [None]:
# 初始化物件檢測模型 Faster R-CNN
model_detection = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model_detection.eval()

# 初始化語義分割模型 DeepLabV3+
model_segmentation = models.segmentation.deeplabv3_resnet50(pretrained=True)
model_segmentation.eval()

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:08<00:00, 18.9MB/s]
Downloading: "https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth" to /root/.cache/torch/hub/checkpoints/deeplabv3_resnet50_coco-cd0a2569.pth
100%|██████████| 161M/161M [00:02<00:00, 64.1MB/s]


DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se