In [15]:
import json
from pathlib import Path

def get_annotation_by_uid(uid: str):
    """
    Search for an annotation by UID across all relation annotation files.
    
    Args:
        uid: The take UID to search for
        
    Returns:
        tuple: (annotation_dict, file_name) if found, (None, None) if not found
    """
    relation_annotations_dir = Path("annotations/relation_annotations")
    json_files = [
        "relations_train.json",
        "relations_val.json", 
        "relations_test.json"
    ]
    
    for json_file in json_files:
        file_path = relation_annotations_dir / json_file
        if not file_path.exists():
            continue
            
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
            
            annotations = data.get('annotations', {})
            if uid in annotations:
                return annotations[uid], json_file
        except Exception as e:
            print(f"Error reading {json_file}: {e}")
            continue
    
    return None, None


In [16]:
uid = "f8bed5fe-3e09-4885-9539-edb4d5b2279a" 

annotation, source_file = get_annotation_by_uid(uid)

if annotation:
    print(f"Found in: {source_file}")
    print(f"Scenario: {annotation.get('scenario', 'N/A')}")
    print(f"Take name: {annotation.get('take_name', 'N/A')}")
    with open("object_masks_output.json", "w") as f:
        json.dump({"object_masks": annotation.get("object_masks", {})}, f, indent=2)
    print(f"Object masks count: {len(annotation.get('object_masks', {}))}")
else:
    print("UID not found in any annotation file")

Found in: relations_train.json
Scenario: Covid-19 Rapid Antigen Test
Take name: sfu_covid_009_7
Object masks count: 1


In [12]:
import json

def parse_uid_file(filepath):
    """Parse a UID file with # TRAIN, # VAL, # TEST sections"""
    splits = {"train": [], "val": [], "test": []}
    current_split = None
    
    with open(filepath, 'r') as f:
        for line in f:
            line = line.strip()
            
            # Check for split markers
            if line == "# TRAIN":
                current_split = "train"
            elif line == "# VAL":
                current_split = "val"
            elif line == "# TEST":
                current_split = "test"
            # Add non-empty lines that aren't comments
            elif line and not line.startswith("#") and current_split:
                splits[current_split].append(line)
    
    return splits

# Parse both files
cooking_splits = parse_uid_file('annotations/relation_annotations/target_uids_cooking.txt')
health_splits = parse_uid_file('annotations/relation_annotations/target_uids_health.txt')

# Save to JSON files
with open('annotations/relation_annotations/cooking_split.json', 'w') as f:
    json.dump(cooking_splits, f, indent=2)

with open('annotations/relation_annotations/health_split.json', 'w') as f:
    json.dump(health_splits, f, indent=2)

# Print summary
print("Cooking splits:")
print(f"  Train: {len(cooking_splits['train'])} UIDs")
print(f"  Val: {len(cooking_splits['val'])} UIDs")
print(f"  Test: {len(cooking_splits['test'])} UIDs")

print("\nHealth splits:")
print(f"  Train: {len(health_splits['train'])} UIDs")
print(f"  Val: {len(health_splits['val'])} UIDs")
print(f"  Test: {len(health_splits['test'])} UIDs")

Cooking splits:
  Train: 288 UIDs
  Val: 82 UIDs
  Test: 110 UIDs

Health splits:
  Train: 55 UIDs
  Val: 12 UIDs
  Test: 23 UIDs
