# This is a notebook for testing and visualizing the soccernet dataset

## Load Data

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
dataset_path = os.getenv("DATASET_PATH")
print(f"dataset_path: {dataset_path}")

# Read Dataset

In [None]:
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

In [None]:
# Define the base directory
base_dir = f"{dataset_path}/SoccerNetGS/"

# Define the subsets
subsets = ['train', 'valid', 'test', 'challenge']

for subset in subsets[1:2]:
    subset_dir = os.path.join(base_dir, subset)
    if not os.path.exists(subset_dir):
        print(f"Directory {subset_dir} does not exist.")
        continue
    
    # List all sequences in the subset
    sequences = os.listdir(subset_dir)
    for seq in sequences:
        seq_dir = os.path.join(subset_dir, seq)
        if not os.path.exists(seq_dir):
            continue
        
        images_dir = os.path.join(seq_dir, 'img1')
        
        # Path to the labels JSON file
        labels_file = os.path.join(seq_dir, 'Labels-GameState.json')
        
        if os.path.exists(labels_file):
            # Read the JSON file
            with open(labels_file, 'r') as f:
                labels = json.load(f)
                
            # Create a mapping from image_id to file_name
            image_id_to_file = {}
            for image_info in labels.get('images', []):
                image_id_to_file[image_info['image_id']] = image_info['file_name']
                
            # Process annotations
            annotations = labels.get('annotations', [])
            for ann in annotations:
                image_id = ann['image_id']
                file_name = image_id_to_file.get(image_id, 'Unknown')
                category_id = ann['category_id']
        else:
            print(f"No labels file found for sequence {seq} in {subset}")


In [None]:
# Load images and annotations into DataFrames
images_df = pd.DataFrame(labels['images'])
annotations_df = pd.DataFrame(labels['annotations'])
categories_df = pd.DataFrame(labels['categories'])

# Display the first few rows
print("Images DataFrame:")
display(images_df.head())

print("\nAnnotations DataFrame:")
display(annotations_df.head())

print("\nCategories DataFrame:")
display(categories_df.head())

In [None]:
print(f"supercategories: {categories_df['supercategory'].unique()}")
print(f"num images: {len(images_df)}")
print(f"num annotations: {len(annotations_df)}")
print(f"num track_ids: {len(annotations_df['track_id'].unique())}")