In [None]:
# Environment and Data Audit
import os, sys, platform, time, subprocess, random
from pathlib import Path
import pandas as pd
import numpy as np
from PIL import Image

t0 = time.time()
CWD = Path.cwd()
print(f"CWD: {CWD}")
print(f"Python: {sys.version.split()[0]} | Platform: {platform.platform()}")
print('Dir listing:', sorted(os.listdir('.')))

def run(cmd):
    print(f"$ {' '.join(cmd)}")
    try:
        out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, text=True)
        print(out)
    except subprocess.CalledProcessError as e:
        print(e.output)

def ensure_torch():
    try:
        import torch  # noqa: F401
        return
    except Exception as e:
        print(f"Torch import failed: {e}\nInstalling torch (CUDA 12.1) ...")
        run([sys.executable, '-m', 'pip', 'install', '--quiet', '--upgrade', 'pip'])
        rc = subprocess.call([sys.executable, '-m', 'pip', 'install', '--quiet', '--index-url', 'https://download.pytorch.org/whl/cu121', 'torch'])
        if rc != 0:
            print("CUDA wheel install failed, trying CPU wheel ...")
            run([sys.executable, '-m', 'pip', 'install', '--quiet', 'torch'])
        import importlib; importlib.invalidate_caches()
        import torch  # noqa: F401

ensure_torch()
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"GPU Count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    props = torch.cuda.get_device_properties(0)
    print(f"GPU Memory: {props.total_memory/1024**3:.1f} GB")
    run(['nvidia-smi'])

csv_files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in csv_files:
    p = CWD / f
    print(f"{f}: exists={p.exists()} size={p.stat().st_size if p.exists() else 'NA'}")

train_df = pd.read_csv('train.csv')
depths_df = pd.read_csv('depths.csv')
sub_df = pd.read_csv('sample_submission.csv')
print('train.csv shape:', train_df.shape)
print(train_df.head(3))
print('depths.csv shape:', depths_df.shape)
print(depths_df.head(3))
print('sample_submission.csv shape:', sub_df.shape)
print(sub_df.head(3))

is_empty = train_df['rle_mask'].isna() | (train_df['rle_mask'].astype(str).str.len() == 0) | (train_df['rle_mask'].astype(str) == 'nan')
print(f"Empty masks: {is_empty.sum()}/{len(train_df)} = {is_empty.mean()*100:.2f}%")

has_depth = train_df['id'].isin(depths_df['id'])
print(f"Train ids with depth: {has_depth.mean()*100:.2f}%")

paths = {'train_images': CWD/'train'/'images', 'test_images': CWD/'test'/'images', 'train_masks_dir': CWD/'train'/'masks'}
for k, p in paths.items():
    print(f"{k}: exists={p.exists()} path={p}")
    if p.exists():
        cnt = len(list(p.rglob('*.png')))
        print(f"  *.png count: {cnt}")

img_path = None
if (CWD/'train'/'images').exists():
    ids = train_df['id'].tolist()
    random.shuffle(ids)
    for _id in ids[:100]:
        ipath = CWD/'train'/'images'/f"{_id}.png"
        if ipath.exists():
            img_path = ipath
            break
    if img_path is None:
        pngs = list((CWD/'train'/'images').glob('*.png'))
        if pngs:
            img_path = pngs[0]

if img_path and Path(img_path).exists():
    with Image.open(img_path) as im:
        print(f"Sample image: {img_path.name} | size={im.size} | mode={im.mode}")
else:
    print("No sample image found under train/images")

print(f"Audit done in {time.time()-t0:.1f}s")