In [None]:
import os, json
from collections import Counter
import matplotlib.pyplot as plt

ANNOTATIONS_DIR = '../dataset/synthetic/annotations'
SPLIT_PATH = '../dataset/synthetic/splits/train.txt'
OUT_DIR = '../docs/figs'
os.makedirs(OUT_DIR, exist_ok=True)

def load_json(p):
    with open(p, 'r', encoding='utf-8') as f:
        return json.load(f)

with open(SPLIT_PATH, 'r', encoding='utf-8') as f:
    names = [l.strip() for l in f if l.strip()]
len(names)


In [None]:
ctr = Counter()
for n in names:
    data = load_json(os.path.join(ANNOTATIONS_DIR, f'{n}.json'))
    for p in data.get('primitives', []):
        ctr[p.get('type', '?')] += 1
labels = list(ctr.keys())
vals = [ctr[k] for k in labels]
plt.figure(figsize=(6,4))
plt.bar(labels, vals, color='#4C78A8')
plt.title('Primitive Types Distribution')
plt.ylabel('Count')
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'primitive_types.png'), dpi=200)
plt.show()


In [None]:
counts = []
for n in names:
    data = load_json(os.path.join(ANNOTATIONS_DIR, f'{n}.json'))
    counts.append(len(data.get('primitives', [])))
plt.figure(figsize=(6,4))
bins = list(range(0, max(counts)+2)) if counts else [0,1]
plt.hist(counts, bins=bins, color='#72B7B2', edgecolor='black')
plt.title('Primitives per Image')
plt.xlabel('# primitives')
plt.ylabel('# images')
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'primitives_per_image.png'), dpi=200)
plt.show()


In [None]:
areas = []
for n in names:
    data = load_json(os.path.join(ANNOTATIONS_DIR, f'{n}.json'))
    W = max(1, int(data.get('width', 1)))
    H = max(1, int(data.get('height', 1)))
    img_area = float(W*H)
    for p in data.get('primitives', []):
        x,y,w,h = p.get('bbox', [0,0,0,0])
        areas.append(max(0.0, float(w*h)/img_area))
plt.figure(figsize=(6,4))
plt.hist(areas if areas else [0.0], bins=20, color='#E45756', edgecolor='black')
plt.title('BBox Area Distribution (normalized)')
plt.xlabel('bbox_area / image_area')
plt.ylabel('count')
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'bbox_area_hist.png'), dpi=200)
plt.show()
