In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import math
import numpy as np
import re
from shapely.geometry import Polygon, LineString, Point
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import CocoDetection
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as T
from torch.optim import SGD, Adam, Adadelta
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import transforms
from torch.utils.data._utils.collate import default_collate
import torchvision
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.transforms import functional as F
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
from math import radians, cos, sin
import ast
import shutil

# 1. Unzip & Prepare Directory

In [None]:
import tarfile

tar_path = './ds2_dense.tar.gz'
extract_path = './ds2_dense/'

with tarfile.open(tar_path, "r:gz") as tar:
    tar.extractall(path=extract_path)

# fix nested folder
nested = os.path.join(extract_path, 'ds2_dense')
if os.path.exists(nested):
    for item in os.listdir(nested):
        shutil.move(os.path.join(nested, item), extract_path)
    shutil.rmtree(nested)

# 2. Load Json file

In [None]:
# 加载 JSON
with open('./ds2_dense/deepscores_train.json') as f:
    data1 = json.load(f)
with open('./ds2_dense/deepscores_test.json') as f:
    data2 = json.load(f)

# 提取图像和标注
train_images = pd.DataFrame(data1['images'])
train_annots = pd.DataFrame(data1['annotations']).T

test_images = pd.DataFrame(data2['images'])
test_annots = pd.DataFrame(data2['annotations']).T

# 3. Splitting the Images into Train and Test sets

In [None]:
image_dir = './ds2_dense/images'
train_dir = './ds2_dense/images/train'
test_dir = './ds2_dense/images/test'
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [None]:
# Move train images to train directory
for image_filename in train_images['filename']:
   src_path = os.path.join(image_dir, image_filename)
   dest_path = os.path.join(train_dir, image_filename)
   shutil.move(src_path, dest_path)

In [None]:
# Move train images to test directory
for image_filename in test_images['filename']:
   src_path = os.path.join(image_dir, image_filename)
   dest_path = os.path.join(test_dir, image_filename)
   shutil.move(src_path, dest_path)

# 4. Generate Label Mapping (deepscores only)
The yolo model wants labels to be zero-based indexing


In [None]:
label_dict = list(data1.values())[2]

# 先过滤出 annotation_set == 'deepscores' 的项
filtered_items = [(k, v) for k, v in label_dict.items() if v['annotation_set'] == 'deepscores']

# 用 enumerate 连续编号
cat_dict = {
    k: {
        'old_id': int(k),
        'name': v['name'],
        'label': i
    } for i, (k, v) in enumerate(filtered_items)
}

# 转为 DataFrame
df_labels = pd.DataFrame.from_dict(cat_dict, orient='index').reset_index(drop=True)

df_labels


# 5. Attach Label to each obbox

In [None]:
class_mapping = dict(zip(df_labels['old_id'].astype(str), df_labels['label']))

def map_label(cat_ids):
    return max([class_mapping.get(str(cid)) for cid in cat_ids if str(cid) in class_mapping], default=None)

train_obboxs['label'] = train_obboxs['cat_id'].apply(map_label)
test_obboxs['label'] = test_obboxs['cat_id'].apply(map_label)

train_obboxs.head(10)

# 6. Adjust bounding box for 0 valued w/h

In [None]:
def adjust_bbox(bbox):
    x_min, y_min, x_max, y_max = bbox
    if x_min == x_max:
        x_min -= 1
        x_max += 1
    if y_min == y_max:
        y_min -= 1
        y_max += 1
    return [x_min, y_min, x_max, y_max]

train_s['bbox'] = train_annots['a_bbox'].apply(adjust_bbox)
test_annots['bbox'] = test_annots['a_bbox'].apply(adjust_bbox)
