# Dataset analysis

### Imports

In [1]:
# to make interactive plotting possible
%matplotlib inline
# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

In [3]:
import json
import fiftyone as fo

### Custom plotting

In [4]:
from PIL import Image
from matplotlib.patches import Rectangle

In [5]:
def plot_boxes(ax, bboxes):
    ax.add_patch(
        Rectangle((bboxes[0], bboxes[1]),  # (x,y)
                  bboxes[2],  # width
                  bboxes[3],  # height
                  alpha=1,
                  facecolor='none',
                  edgecolor='red',  # color
                  linewidth=1))

In [7]:
img_path = '/Users/andriiliubonko/workspace/data/soda_a/Images/00001.jpg'
img = Image.open(img_path).convert('RGB')
bboxes = [235.0841064453125, 2529.2451171875, 21.561431884765625, 27.17333984375]

In [9]:
# fig_size = 10
# _ = plt.figure(figsize=(fig_size, fig_size))
# ax = plt.subplot()
# ax.set_aspect('equal')
# plot_boxes(ax, bboxes)
# plt.imshow(img)

### First Dataset

In [4]:
# Create an empty dataset
dataset = fo.Dataset("test-dataset")

In [5]:
print(dataset)

Name:        test-dataset
Media type:  None
Num samples: 0
Persistent:  False
Tags:        []
Sample fields:
    id:       fiftyone.core.fields.ObjectIdField
    filepath: fiftyone.core.fields.StringField
    tags:     fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)


### COCO

https://colab.research.google.com/github/voxel51/fiftyone-examples/blob/master/examples/quickstart.ipynb#scrollTo=sDRyBGZgzAz4

**pycocotools** should be installed 
```
> pip install pycocotools
```

In [62]:
ds_name = 'coco-val'
ds_imgs_dir = '/Users/andriiliubonko/workspace/data/coco/val2017/'
ds_labels = '/Users/andriiliubonko/workspace/data/coco/annotations/instances_val2017.json'
ds_type = fo.types.COCODetectionDataset

In [66]:
dataset = fo.Dataset.from_dir(
    dataset_type=ds_type,
    data_path=ds_imgs_dir,
    labels_path=ds_labels,
)



 100% |███████████████| 5000/5000 [1.2m elapsed, 0s remaining, 65.5 samples/s]      


In [67]:
print(dataset)

Name:        2023.08.29.22.34.13
Media type:  image
Num samples: 5000
Persistent:  False
Tags:        []
Sample fields:
    id:            fiftyone.core.fields.ObjectIdField
    filepath:      fiftyone.core.fields.StringField
    tags:          fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:      fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    detections:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    segmentations: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)


In [4]:
#session = fo.launch_app(dataset)

### SOTA-A

**More correct way to go** 
https://docs.voxel51.com/recipes/custom_importer.html?highlight=metadata%20sample


In [4]:
def poly2hbb(polys, hw=None):
    """Convert polygons to horizontal polys.

    Args:
        polys (np.array): Polygons with shape (N, 8)

    Returns:
        np.array: Horizontal polys.
        
    Source, with small modifications:
        https://github.com/shaunyuan22/SODA-mmrotate/blob/main/tools/data/sodaa/sodaa_split.py
    """
    shape = polys.shape
    polys = polys.reshape(*shape[:-1], shape[-1] // 2, 2)
    lt_point = np.min(polys, axis=-2)
    rb_point = np.max(polys, axis=-2)
    
    if hw:
        h,w = hw
        lt_point[0] /= w
        lt_point[1] /= h
        rb_point[0] /= w
        rb_point[1] /= h        
    
    return np.concatenate([lt_point, rb_point - lt_point], axis=-1)

In [5]:
ds_name = 'sota-a'
ds_imgs_dir = '/Users/andriiliubonko/workspace/data/soda_a/Images/'
ds_anns_dir = '/Users/andriiliubonko/workspace/data/soda_a/Annotations/'

In [6]:
with open(os.path.join(ds_anns_dir, 'train', '00001.json'), 'r') as file:
    ann_data = json.load(file)
    
height, width = (ann_data['images']['height'], ann_data['images']['width'])
image_metadata = fo.ImageMetadata(
            width=width,
            height=height,
        )

category_map = {cat['id']:cat['name'] for cat in ann_data['categories']} 

In [7]:
ann_data.keys()

dict_keys(['type', 'images', 'annotations', 'categories'])

In [8]:
print(ann_data['type'], '\n')
print(ann_data['images'], '\n')
print(ann_data['annotations'][0], '\n')
print(ann_data['categories'], '\n')

instance 

{'file_name': '00001.jpg', 'height': 2744, 'width': 4800, 'id': 1} 

{'poly': [235.0841064453125, 2551.818359375, 249.3841552734375, 2556.41845703125, 256.6455383300781, 2533.84521484375, 242.34548950195312, 2529.2451171875], 'area': 356.2015366703272, 'category_id': 0, 'image_id': 1, 'id': 1} 

[{'id': 0, 'name': 'airplane'}, {'id': 1, 'name': 'helicopter'}, {'id': 2, 'name': 'small-vehicle'}, {'id': 3, 'name': 'large-vehicle'}, {'id': 4, 'name': 'ship'}, {'id': 5, 'name': 'container'}, {'id': 6, 'name': 'storage-tank'}, {'id': 7, 'name': 'swimming-pool'}, {'id': 8, 'name': 'windmill'}, {'id': 9, 'name': 'ignore'}] 



In [24]:
samples = []
sample = fo.Sample(filepath=os.path.join(ds_imgs_dir, ann_data['images']['file_name']),
                  metadata=image_metadata)

In [25]:
#print(sample)

In [26]:
detections = []
for ann in ann_data['annotations']:

    label=category_map[ann['category_id']]
    bounding_box  = poly2hbb(np.array(ann['poly']), hw=(height, width))
    
    detections.append(
        fo.Detection(label=label, bounding_box=bounding_box, area=ann['area'])
    )
sample["ground_truth"] = fo.Detections(detections=detections)

In [27]:
samples.append(sample)

In [28]:
dataset = fo.Dataset("sota-a")
dataset.add_samples(samples)



 100% |█████████████████████| 1/1 [138.7ms elapsed, 0s remaining, 7.3 samples/s] 


['64efb0dd3905162940d5e54a']

In [29]:
print(dataset)

Name:        sota-a
Media type:  image
Num samples: 1
Persistent:  False
Tags:        []
Sample fields:
    id:           fiftyone.core.fields.ObjectIdField
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)


In [31]:
#session = fo.launch_app(dataset)