# Create Train and Test Data Frames

In [1]:
import torch
import os
import xml.etree.ElementTree as ET
import pandas as pd
from sklearn.model_selection import train_test_split

In [11]:
BASE_PATH="/home/jpchagas/Downloads/20211025_Custom_Object_Detection_using_PyTorch_Faster_RCNN/wsl/test/"

In [12]:
files = os.listdir(BASE_PATH)

In [13]:
xml_files = [x for x in files if x.endswith(".xml")]

In [14]:
data = []

In [15]:
for file in xml_files:
    try:
        filename=file.replace(".xml",".png")
        tree = ET.parse(BASE_PATH +file)
        root = tree.getroot()
        xml_path = root[2].text
        ssize = root[4]
        width = ssize[0].text
        height = ssize[1].text
        xml_object = root[6]
        label = xml_object[0].text
        coordinates = xml_object[4]
        xmin = coordinates[0].text
        ymin = coordinates[1].text
        xmax = coordinates[2].text
        ymax = coordinates[3].text
        row = [filename,width,height,label,xmin,ymin,xmax,ymax]
        data.append(row)
    except:
        print(file)

In [16]:
df = pd.DataFrame(data, columns=['filename','width','height','class', 'xmin', 'ymin', 'xmax', 'ymax'])

In [17]:
df

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,aus_margaret_WebbWright_scene11971.png,1280,720,surfer,560,305,634,491
1,aus_margaret_WebbWright_scene11061.png,1280,720,surfer,716,371,917,525
2,aus_margaret_WebbGilmore_scene08331.png,1280,720,surfer,714,236,821,410
3,aus_margaret_WebbGilmore_scene10641.png,1280,720,surfer,718,332,877,470
4,aus_margaret_WebbWright_scene11691.png,1280,720,surfer,680,249,777,402
...,...,...,...,...,...,...,...,...
189,aus_margaret_WebbWright_scene18201.png,1280,720,surfer,818,419,1016,568
190,aus_margaret_WebbWright_scene07071.png,1280,720,surfer,624,404,808,545
191,aus_margaret_WebbWright_scene07001.png,1280,720,surfer,849,215,923,300
192,aus_margaret_WebbWright_scene02241.png,1280,720,surfer,717,255,826,386


In [18]:
df.to_csv('test_labels.csv', index=False)

In [None]:
Y_col = 'Label'
X_cols = df.loc[:, df.columns != Y_col].columns

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df[X_cols], df[Y_col],test_size=0.2, random_state=42)

In [None]:
X_train

# Configurações

In [None]:
BATCH_SIZE = 4 # increase / decrease according to GPU memeory
RESIZE_TO = 512 # resize the image for training and transforms
NUM_EPOCHS = 100 # number of epochs to train for
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# training images and XML files directory
TRAIN_DIR = '../Microcontroller Detection/train'
# validation images and XML files directory
VALID_DIR = '../Microcontroller Detection/test'
# classes: 0 index is reserved for background
CLASS = 'surfer'
NUM_CLASSES = 1
# whether to visualize images after crearing the data loaders
VISUALIZE_TRANSFORMED_IMAGES = False
# location to save model and plots
OUT_DIR = '../outputs'
SAVE_PLOTS_EPOCH = 2 # save loss plots after these many epochs
SAVE_MODEL_EPOCH = 2 # save model after these many epochs

# Create dataset to 

In [None]:
import torch
import cv2
import numpy as np
import os
import glob as glob
from xml.etree import ElementTree as et
from config import CLASSES, RESIZE_TO, TRAIN_DIR, VALID_DIR, BATCH_SIZE
from torch.utils.data import Dataset, DataLoader
from utils import collate_fn, get_train_transform, get_valid_transform

# the dataset class
class SurferDataset(Dataset):
    def __init__(self, dir_path, width, height, classes, transforms=None):
        self.transforms = transforms
        self.dir_path = dir_path
        self.height = height
        self.width = width
        self.classes = classes
        
        # get all the image paths in sorted order
        self.image_paths = glob.glob(f"{self.dir_path}/*.jpg")
        self.all_images = [image_path.split('/')[-1] for image_path in self.image_paths]
        self.all_images = sorted(self.all_images)
    def __getitem__(self, idx):
        # capture the image name and the full image path
        image_name = self.all_images[idx]
        image_path = os.path.join(self.dir_path, image_name)
        # read the image
        image = cv2.imread(image_path)
        # convert BGR to RGB color format
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image_resized = cv2.resize(image, (self.width, self.height))
        image_resized /= 255.0
        
        # capture the corresponding XML file for getting the annotations
        annot_filename = image_name[:-4] + '.xml'
        annot_file_path = os.path.join(self.dir_path, annot_filename)
        
        boxes = []
        labels = []
        tree = et.parse(annot_file_path)
        root = tree.getroot()
        
        # get the height and width of the image
        image_width = image.shape[1]
        image_height = image.shape[0]
        
        # box coordinates for xml files are extracted and corrected for image size given
        for member in root.findall('object'):
            # map the current object name to `classes` list to get...
            # ... the label index and append to `labels` list
            labels.append(self.classes.index(member.find('name').text))
            
            # xmin = left corner x-coordinates
            xmin = int(member.find('bndbox').find('xmin').text)
            # xmax = right corner x-coordinates
            xmax = int(member.find('bndbox').find('xmax').text)
            # ymin = left corner y-coordinates
            ymin = int(member.find('bndbox').find('ymin').text)
            # ymax = right corner y-coordinates
            ymax = int(member.find('bndbox').find('ymax').text)
            
            # resize the bounding boxes according to the...
            # ... desired `width`, `height`
            xmin_final = (xmin/image_width)*self.width
            xmax_final = (xmax/image_width)*self.width
            ymin_final = (ymin/image_height)*self.height
            yamx_final = (ymax/image_height)*self.height
            
            boxes.append([xmin_final, ymin_final, xmax_final, yamx_final])
        
        # bounding box to tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # area of the bounding boxes
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # no crowd instances
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        # labels to tensor
        labels = torch.as_tensor(labels, dtype=torch.int64)
        # prepare the final `target` dictionary
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        image_id = torch.tensor([idx])
        target["image_id"] = image_id
        # apply the image transforms
        if self.transforms:
            sample = self.transforms(image = image_resized,
                                     bboxes = target['boxes'],
                                     labels = labels)
            image_resized = sample['image']
            target['boxes'] = torch.Tensor(sample['bboxes'])
            
        return image_resized, target
    def __len__(self):
        return len(self.all_images)

# Create Model