In [64]:
from torch.utils.data import Dataset
from PIL import Image
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
import ast
import torch
import os

# Data Preprocessing

In [42]:
annots = pd.read_csv('images/archive/annotations.csv')

In [43]:
annots.head()

Unnamed: 0,id,image_id,geometry,class
0,1,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(135, 522), (245, 522), (245, 600), (135, 600...",Airplane
1,2,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(1025, 284), (1125, 284), (1125, 384), (1025,...",Airplane
2,3,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(1058, 1503), (1130, 1503), (1130, 1568), (10...",Airplane
3,4,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(813, 1518), (885, 1518), (885, 1604), (813, ...",Airplane
4,5,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(594, 938), (657, 938), (657, 1012), (594, 10...",Airplane


In [50]:
def tuples_to_bbox(tuples):
    x = [t[0] for t in tuples]
    y = [t[1] for t in tuples]
    return min(x), min(y), max(x), max(y)

In [47]:
annots['geometry'] = annots['geometry'].apply(ast.literal_eval)

In [51]:
annots['bbox'] = annots['geometry'].apply(tuples_to_bbox)

In [53]:
annots.head()

Unnamed: 0,id,image_id,geometry,class,bbox
0,1,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(135, 522), (245, 522), (245, 600), (135, 600...",Airplane,"(135, 522, 245, 600)"
1,2,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(1025, 284), (1125, 284), (1125, 384), (1025,...",Airplane,"(1025, 284, 1125, 384)"
2,3,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(1058, 1503), (1130, 1503), (1130, 1568), (10...",Airplane,"(1058, 1503, 1130, 1568)"
3,4,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(813, 1518), (885, 1518), (885, 1604), (813, ...",Airplane,"(813, 1518, 885, 1604)"
4,5,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(594, 938), (657, 938), (657, 1012), (594, 10...",Airplane,"(594, 938, 657, 1012)"


In [72]:
annots['class'].unique()

array(['Airplane', 'Truncated_airplane'], dtype=object)

In [75]:
#encode class to either 0 or 1
annots['class'] = annots['class'].apply(lambda x: 0 if x == 'Airplane' else 1)

In [76]:
annots.head()

Unnamed: 0,id,image_id,geometry,class,bbox
0,1,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(135, 522), (245, 522), (245, 600), (135, 600...",0,"(135, 522, 245, 600)"
1,2,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(1025, 284), (1125, 284), (1125, 384), (1025,...",0,"(1025, 284, 1125, 384)"
2,3,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(1058, 1503), (1130, 1503), (1130, 1568), (10...",0,"(1058, 1503, 1130, 1568)"
3,4,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(813, 1518), (885, 1518), (885, 1604), (813, ...",0,"(813, 1518, 885, 1604)"
4,5,4f833867-273e-4d73-8bc3-cb2d9ceb54ef.jpg,"[(594, 938), (657, 938), (657, 1012), (594, 10...",0,"(594, 938, 657, 1012)"


# Initializing dataset

In [85]:
class AircraftDetectionDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        """
        Args:
            dataframe (pandas.DataFrame): DataFrame containing annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.dataframe.iloc[idx]['image_id'])
        image = Image.open(img_name).convert("RGB")

        bbox = self.dataframe.iloc[idx]['bbox']
        if isinstance(bbox, str):
            bbox = eval(bbox)

        bbox_tensor = torch.tensor(bbox, dtype=torch.float32)
        class_label = self.dataframe.iloc[idx]['class']
        class_label_tensor = torch.tensor([class_label], dtype=torch.int64)

        target = {}
        target["boxes"] = bbox_tensor.unsqueeze(0)
        target["labels"] = class_label_tensor

        if self.transform:
            image = self.transform(image)  # Ensure this transforms the PIL Image to a tensor.

        return image, target


In [93]:
dataset = AircraftDetectionDataset(annots, 'images/img', transform=transform)

In [94]:
dataset[0]

(tensor([[[0.7216, 0.7255, 0.7294,  ..., 0.6824, 0.6941, 0.6902],
          [0.7216, 0.7255, 0.7412,  ..., 0.7059, 0.7098, 0.7059],
          [0.7255, 0.7333, 0.7451,  ..., 0.7216, 0.7216, 0.7216],
          ...,
          [0.7647, 0.7647, 0.7686,  ..., 0.7608, 0.7647, 0.7725],
          [0.7569, 0.7529, 0.7569,  ..., 0.7137, 0.7176, 0.7255],
          [0.7608, 0.7608, 0.7647,  ..., 0.7294, 0.7333, 0.7412]],
 
         [[0.6902, 0.6941, 0.6980,  ..., 0.6471, 0.6588, 0.6510],
          [0.6902, 0.6941, 0.7098,  ..., 0.6588, 0.6667, 0.6667],
          [0.6941, 0.7020, 0.7176,  ..., 0.6706, 0.6706, 0.6667],
          ...,
          [0.7490, 0.7490, 0.7529,  ..., 0.7451, 0.7451, 0.7451],
          [0.7412, 0.7412, 0.7451,  ..., 0.6980, 0.6980, 0.6941],
          [0.7451, 0.7490, 0.7529,  ..., 0.7098, 0.7137, 0.7098]],
 
         [[0.6471, 0.6510, 0.6549,  ..., 0.6039, 0.6118, 0.6039],
          [0.6471, 0.6510, 0.6667,  ..., 0.6039, 0.6118, 0.6118],
          [0.6510, 0.6588, 0.6745,  ...,

In [92]:
from torchvision import transforms
from torch.utils.data import DataLoader
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [97]:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4)

# Example: Iterate over small batch of data to verify that the dataset is working as expected.

for i, (images, targets) in enumerate(dataloader):
    print(f"Batch {i}")
    print(f"Image batch shape: {images.shape}")
    print(f"Target batch shape: {targets['boxes'].shape}")
    break