# Implement YOLO with DIOR dataset

## Import libraries

In [20]:
import os
import shutil
import gdown
import random
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from collections import Counter
import matplotlib.pyplot as plt
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from xml.etree import ElementTree


## Import Data

In [3]:
raw_data_path = "raw_data"

In [13]:
if os.path.exists(raw_data_path):
    print('[INFO] Raw data directory exists, skiping download.')
else:
    os.makedirs(raw_data_path)
    print('[INFO] Raw data directory is been created.')
    print('[INFO] Downloading data...\n')
    gdown.download('https://drive.google.com/uc?id=1KoQzqR20qvIXDf1qsXCHGxD003IPmXMw', output=os.path.join(raw_data_path, 'Annotations.zip'))
    gdown.download('https://drive.google.com/file/d/1wq0FQCBsbrnf-sJfloi7IR9tBH87GGGS/view?usp=sharing', output=os.path.join(raw_data_path, 'JPEGImages-test.zip'),fuzzy=True)
    gdown.download('https://drive.google.com/file/d/1NVRSBm3RfpGGtZvgLJG5e_XD9uP4DZmI/view?usp=sharing', output=os.path.join(raw_data_path, 'JPEGImages-trainval.zip'),fuzzy=True)
    print('[INFO] Data is been downloaded.')

[INFO] Raw data directory is been created.
[INFO] Downloading data...



Downloading...
From (original): https://drive.google.com/uc?id=1KoQzqR20qvIXDf1qsXCHGxD003IPmXMw
From (redirected): https://drive.google.com/uc?id=1KoQzqR20qvIXDf1qsXCHGxD003IPmXMw&confirm=t&uuid=24e299d3-37dd-46fe-a16a-bc8094995417
To: /home/ec2-user/ObjDct_Master_Thesis/raw_data/Annotations.zip
100%|██████████| 32.1M/32.1M [00:00<00:00, 36.0MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1wq0FQCBsbrnf-sJfloi7IR9tBH87GGGS
From (redirected): https://drive.google.com/uc?id=1wq0FQCBsbrnf-sJfloi7IR9tBH87GGGS&confirm=t&uuid=7e307aa4-40ad-4ab1-95a0-3aed29655028
To: /home/ec2-user/ObjDct_Master_Thesis/raw_data/JPEGImages-test.zip
100%|██████████| 3.52G/3.52G [00:47<00:00, 73.6MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1NVRSBm3RfpGGtZvgLJG5e_XD9uP4DZmI
From (redirected): https://drive.google.com/uc?id=1NVRSBm3RfpGGtZvgLJG5e_XD9uP4DZmI&confirm=t&uuid=939c6988-38ef-4680-936d-ee3f8f64e001
To: /home/ec2-user/ObjDct_Master_Thesis/raw_data/JPEGImages-t

[INFO] Data is been downloaded.





In [9]:
dior_data_path = "dior_data"

In [14]:
# Extracting the zip data files
if os.path.exists(dior_data_path):
    print('[INFO] DIOR data directory exists, skiping extraction.')
else:
    os.makedirs(dior_data_path)
    for i in os.listdir(raw_data_path):
        filename = os.path.join(raw_data_path, i) 
        shutil.unpack_archive(filename=filename, extract_dir=dior_data_path)
        print(f'[INFO] File "{filename}" is been extracted to "{dior_data_path}".')

[INFO] File "raw_data/Annotations.zip" is been extracted to "dior_data".
[INFO] File "raw_data/JPEGImages-test.zip" is been extracted to "dior_data".
[INFO] File "raw_data/JPEGImages-trainval.zip" is been extracted to "dior_data".


In [15]:
# Path for all the data
annot_data_path = 'dior_data/Annotations/Horizontal Bounding Boxes'
trainval_data_path = 'dior_data/JPEGImages-trainval'
test_data_path = 'dior_data/JPEGImages-test'

In [16]:
# Creating a list of annotation files
annot_file_list = sorted([os.path.join(annot_data_path, i) for i in os.listdir(annot_data_path) if '.xml' in i])
annot_file_list[:5], annot_file_list[-5:], len(annot_file_list)

(['dior_data/Annotations/Horizontal Bounding Boxes/00001.xml',
  'dior_data/Annotations/Horizontal Bounding Boxes/00002.xml',
  'dior_data/Annotations/Horizontal Bounding Boxes/00003.xml',
  'dior_data/Annotations/Horizontal Bounding Boxes/00004.xml',
  'dior_data/Annotations/Horizontal Bounding Boxes/00005.xml'],
 ['dior_data/Annotations/Horizontal Bounding Boxes/23459.xml',
  'dior_data/Annotations/Horizontal Bounding Boxes/23460.xml',
  'dior_data/Annotations/Horizontal Bounding Boxes/23461.xml',
  'dior_data/Annotations/Horizontal Bounding Boxes/23462.xml',
  'dior_data/Annotations/Horizontal Bounding Boxes/23463.xml'],
 23463)

In [17]:
# Creating a list of training and validation images
trainval_file_list = sorted([os.path.join(trainval_data_path, i) for i in os.listdir(trainval_data_path) if '.jpg' in i])
trainval_file_list[:5], trainval_file_list[-5:], len(trainval_file_list)

(['dior_data/JPEGImages-trainval/00001.jpg',
  'dior_data/JPEGImages-trainval/00002.jpg',
  'dior_data/JPEGImages-trainval/00003.jpg',
  'dior_data/JPEGImages-trainval/00004.jpg',
  'dior_data/JPEGImages-trainval/00005.jpg'],
 ['dior_data/JPEGImages-trainval/11721.jpg',
  'dior_data/JPEGImages-trainval/11722.jpg',
  'dior_data/JPEGImages-trainval/11723.jpg',
  'dior_data/JPEGImages-trainval/11724.jpg',
  'dior_data/JPEGImages-trainval/11725.jpg'],
 11725)

In [18]:
# Creating a list of testing images
test_file_list = sorted([os.path.join(test_data_path, i) for i in os.listdir(test_data_path) if '.jpg' in i])
test_file_list[:5], test_file_list[-5:], len(test_file_list)

(['dior_data/JPEGImages-test/11726.jpg',
  'dior_data/JPEGImages-test/11727.jpg',
  'dior_data/JPEGImages-test/11728.jpg',
  'dior_data/JPEGImages-test/11729.jpg',
  'dior_data/JPEGImages-test/11730.jpg'],
 ['dior_data/JPEGImages-test/23459.jpg',
  'dior_data/JPEGImages-test/23460.jpg',
  'dior_data/JPEGImages-test/23461.jpg',
  'dior_data/JPEGImages-test/23462.jpg',
  'dior_data/JPEGImages-test/23463.jpg'],
 11738)

In [19]:
# Combining all the images path in one list
image_data_file_list = np.concatenate((trainval_file_list, test_file_list))
image_data_file_list[:5], image_data_file_list[-5:], len(image_data_file_list)

(array(['dior_data/JPEGImages-trainval/00001.jpg',
        'dior_data/JPEGImages-trainval/00002.jpg',
        'dior_data/JPEGImages-trainval/00003.jpg',
        'dior_data/JPEGImages-trainval/00004.jpg',
        'dior_data/JPEGImages-trainval/00005.jpg'], dtype='<U39'),
 array(['dior_data/JPEGImages-test/23459.jpg',
        'dior_data/JPEGImages-test/23460.jpg',
        'dior_data/JPEGImages-test/23461.jpg',
        'dior_data/JPEGImages-test/23462.jpg',
        'dior_data/JPEGImages-test/23463.jpg'], dtype='<U39'),
 23463)