In [None]:
import os
import shutil # Import shutil for file operations

# ============== EDIT THESE ==============
DRIVE_BASE = "/content/drive/MyDrive"
DRIVE_PROJECT_FOLDER = "jersey-number-pipeline"  # folder under My Drive with your zip and weights
DATASET_ZIP = "jersey-number-pipeline/data/SoccerNet/jersey-2023.zip" # Add dataset to this location
WEIGHTS_FOLDER = "models" # Please confirm if this should be 'models' or 'weights'
MOUNT_DRIVE = True  # set False if you already mounted in a previous run
PERSIST_TO_DRIVE = True  # True = clone repo & SAM to Drive; False = clone to /content (faster, lost when runtime ends)

if MOUNT_DRIVE:
    from google.colab import drive
    drive.mount("/content/drive")

# Determine the base directory for all project components (main repo, sam2, etc.)
if PERSIST_TO_DRIVE:
    base_project_dir = os.path.join(DRIVE_BASE, DRIVE_PROJECT_FOLDER)
    os.makedirs(base_project_dir, exist_ok=True)
else:
    base_project_dir = "/content" # Clone directly into /content for ephemeral
    os.makedirs(base_project_dir, exist_ok=True) # Ensure it exists

# Main repository (jersey-number-pipeline)
repo_root = os.path.join(base_project_dir, "jersey-number-pipeline")
if not os.path.isdir(os.path.join(repo_root, ".git")):
    get_ipython().system(f'cd "{base_project_dir}" && git clone https://github.com/superbolt08/jersey-number-pipeline.git')
print("Repo root (main project):", repo_root) # This is the main repo's folder

# Change into the base_project_dir for cloning submodules, this is where sam2, reid etc. should reside
# This ensures relative paths for subsequent git clones work correctly.
get_ipython().run_line_magic("cd", base_project_dir)

# SAM (required for legibility) - now cloned relative to base_project_dir
if not os.path.isdir("sam2"):
    get_ipython().system("git clone --recurse-submodules https://github.com/davda54/sam.git sam2")

# Re-ID: centroids-reid (setup.py)
os.makedirs(os.path.join(base_project_dir, "reid"), exist_ok=True)
if not os.path.isdir(os.path.join(base_project_dir, "reid", "centroids-reid")):
    get_ipython().system(f'cd "{base_project_dir}/reid" && git clone --recurse-submodules https://github.com/mikwieczorek/centroids-reid.git centroids-reid')
    os.makedirs(os.path.join(base_project_dir, "reid", "centroids-reid", "models"), exist_ok=True)

# Pose: ViTPose (setup.py)
os.makedirs(os.path.join(base_project_dir, "pose"), exist_ok=True)
if not os.path.isdir(os.path.join(base_project_dir, "pose", "ViTPose")):
    get_ipython().system(f'cd "{base_project_dir}/pose" && git clone --recurse-submodules https://github.com/ViTAE-Transformer/ViTPose.git ViTPose')

# STR: PARSeq (setup.py)
os.makedirs(os.path.join(base_project_dir, "str"), exist_ok=True)
if not os.path.isdir(os.path.join(base_project_dir, "str", "parseq")):
    get_ipython().system(f'cd "{base_project_dir}/str" && git clone --recurse-submodules https://github.com/baudm/parseq.git parseq')

print("All repositories cloned/checked.")

# repo_root was set above for the main project
# drive_project is the parent folder on Drive (e.g., /content/drive/MyDrive/jersey-number-pipeline)
drive_project_path = os.path.join(DRIVE_BASE, DRIVE_PROJECT_FOLDER)

# Corrected path for DATASET_ZIP
drive_zip = os.path.join(drive_project_path, DATASET_ZIP)

print(f"Expected zip file path: {drive_zip}") # Print expected path for user to verify

# FORCIBLY REMOUNT DRIVE to resolve potential caching issues BEFORE os.path.exists
# This is done only once at the beginning, so remove the redundant remount here.
# print("Attempting to forcibly remount Google Drive...")
# drive.mount("/content/drive", force_remount=True)
# print("Google Drive remounted.")

data_sn_path_local_base = "/content/data" # Base path for local extraction
data_sn_path_local = os.path.join(data_sn_path_local_base, "SoccerNet") # Full local path for SoccerNet data

# Path to check if data is already unzipped and reorganized
unzipped_data_check_path = os.path.join(data_sn_path_local, "jersey-2023")

# Check if the data is already unzipped and reorganized
if os.path.isdir(unzipped_data_check_path):
    print(f"Dataset already appears to be unzipped and organized at '{unzipped_data_check_path}'. Skipping unzipping and reorganization.")
elif not os.path.exists(drive_zip):
    print(f"ERROR: The dataset zip file '{DATASET_ZIP}' was not found at '{drive_zip}'.")
    print(f"Please ensure you have uploaded '{DATASET_ZIP}' to your Google Drive folder: '{drive_project_path}'.")
    raise FileNotFoundError(f"Dataset zip file missing: {drive_zip}")
else:
    print(f"Found dataset zip file at '{drive_zip}'. Proceeding to unzip to local /content.")

    # Ensure clean slate for local extraction
    if os.path.exists(data_sn_path_local):
        get_ipython().system(f'rm -rf "{data_sn_path_local}"/*')
    os.makedirs(data_sn_path_local, exist_ok=True)

    # Copy the zip file to local /content/tmp for reliable unzipping using shutil.copy
    temp_unzip_dir = "/content/tmp"
    os.makedirs(temp_unzip_dir, exist_ok=True)
    temp_zip_path = os.path.join(temp_unzip_dir, os.path.basename(drive_zip))

    print(f"Copying '{drive_zip}' to temporary location '{temp_zip_path}' using shutil.copy...")
    shutil.copy(drive_zip, temp_zip_path)

    print(f"Unzipping '{temp_zip_path}' to '{data_sn_path_local}'...")
    get_ipython().system(f'unzip -o "{temp_zip_path}" -d "{data_sn_path_local}"') # Removed -q for verbose output

    # Clean up the temporary copy
    print(f"Cleaning up temporary zip file: '{temp_zip_path}'")
    get_ipython().system(f'rm "{temp_zip_path}"')

    print(f"Dataset extracted to local: {data_sn_path_local}")

    # Create symbolic link from repo's expected data path to local extracted data
    # First remove any existing 'data/SoccerNet' dir in repo_root (if it was created in Drive)
    repo_data_sn_path = os.path.join(repo_root, "data", "SoccerNet")
    if os.path.exists(repo_data_sn_path):
        if os.path.islink(repo_data_sn_path):
            get_ipython().system(f'rm "{repo_data_sn_path}"')
        else:
            get_ipython().system(f'rm -rf "{repo_data_sn_path}"')
    get_ipython().system(f'mkdir -p "{os.path.dirname(repo_data_sn_path)}"') # Ensure parent dir exists
    get_ipython().system(f'ln -s "{data_sn_path_local}" "{repo_data_sn_path}"')
    print(f"Symlinked '{data_sn_path_local}' to '{repo_data_sn_path}'")
    get_ipython().system(f'ls -l "{os.path.dirname(repo_data_sn_path)}"') # Verify symlink

    # The reorganization logic needs to be aware of the new local path structure.
    # Check if 'train' and 'test' folders exist directly under local_soccernet_path AND 'jersey-2023' does NOT.
    if os.path.isdir(f"{data_sn_path_local}/train") and os.path.isdir(f"{data_sn_path_local}/test") and not os.path.isdir(f"{data_sn_path_local}/jersey-2023"):
        print("Reorganizing dataset structure: Moving 'train' and 'test' into 'jersey-2023' in local /content.")
        get_ipython().system(f'mkdir -p "{data_sn_path_local}/jersey-2023"')
        get_ipython().system(f'mv "{data_sn_path_local}/train" "{data_sn_path_local}/jersey-2023/"')
        get_ipython().system(f'mv "{data_sn_path_local}/test" "{data_sn_path_local}/jersey-2023/"')
        print(f"Contents of {data_sn_path_local} after reorganization:")
        get_ipython().system(f'ls -R "{data_sn_path_local}"/')
    elif os.path.isdir(f"{data_sn_path_local}/jersey-2023"):
        print(f"Dataset extracted directly to {data_sn_path_local}/jersey-2023/.")
    else:
        print("WARNING: Unexpected dataset structure after unzipping. Please verify local '{data_sn_path_local}' contents.")

drive_weights = os.path.join(drive_project_path, WEIGHTS_FOLDER)

# Ensure target directories exist before copying weights
get_ipython().system(f'mkdir -p "{repo_root}/models" "{os.path.join(base_project_dir, "reid", "centroids-reid", "models")}" "{os.path.join(base_project_dir, "pose", "ViTPose", "checkpoints")}"')
# Suppress output of 'cp' unless there's a problem
get_ipython().system(f'cp "{drive_weights}/models/"* "{repo_root}/models/" 2>/dev/null || true')
get_ipython().system(f'cp "{drive_weights}/reid/"* "{os.path.join(base_project_dir, "reid", "centroids-reid", "models")}/" 2>/dev/null || true')
get_ipython().system(f'cp "{drive_weights}/pose/"* "{os.path.join(base_project_dir, "pose", "ViTPose", "checkpoints")}/" 2>/dev/null || true')
print("Weights copied.")

get_ipython().system('pip install -q torch torchvision opencv-python Pillow numpy pandas scipy tqdm pytorch-lightning yacs')

# Change directory to repo_root for the subprocess call
get_ipython().run_line_magic("cd", repo_root)

import subprocess
import sys

# Construct the command and environment for the subprocess
command = [sys.executable, "main.py", "SoccerNet", "test"]
env = os.environ.copy()
# PYTHONPATH: sam2 for SAM; reid/centroids-reid so centroid_reid.py can "from config import cfg"
centroids_reid_root = os.path.join(base_project_dir, "reid", "centroids-reid")
env["PYTHONPATH"] = f"{os.path.join(base_project_dir, 'sam2')}:{centroids_reid_root}:{env.get('PYTHONPATH', '')}"

print(f"Attempting to run: {' '.join(command)}")
print(f"With PYTHONPATH: {env['PYTHONPATH']}")

# Execute main.py using subprocess.run
# capture_output=True captures stdout and stderr
# text=True decodes stdout/stderr as text
result = subprocess.run(command, env=env, capture_output=True, text=True, cwd=repo_root)

# Print subprocess output
print("Standard Output from main.py:\n", result.stdout)
print("Standard Error from main.py:\n", result.stderr)

if result.returncode != 0:
    print(f"Error executing main.py. Exit code: {result.returncode}")
    print("Traceback (if available from main.py subprocess):\n", result.stderr)
else:
    print("main.py executed successfully.")