In [None]:
import os, sys, time
from pathlib import Path
print('Smoke check: cell executed at', time.strftime('%Y-%m-%d %H:%M:%S'))
print('CWD:', Path.cwd())
print('Files in CWD:', sorted(os.listdir('.')))

In [None]:
# Environment and Data Audit
import os, sys, platform, time, subprocess, random
from pathlib import Path
import pandas as pd
import numpy as np
from PIL import Image

start_time = time.time()
CWD = Path.cwd()
print(f"CWD: {CWD}")
print(f"Python: {sys.version.split()[0]} | Platform: {platform.platform()}")

def run(cmd):
    print(f"$ {' '.join(cmd)}")
    try:
        out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, text=True)
        print(out)
    except subprocess.CalledProcessError as e:
        print(e.output)

# Try Torch and GPU
def ensure_torch():
    try:
        import torch  # noqa: F401
        return
    except Exception as e:
        print(f"Torch import failed: {e}\nInstalling torch (CUDA 12.1) ...")
        run([sys.executable, '-m', 'pip', 'install', '--quiet', '--upgrade', 'pip'])
        rc = subprocess.call([sys.executable, '-m', 'pip', 'install', '--quiet', '--index-url', 'https://download.pytorch.org/whl/cu121', 'torch'])
        if rc != 0:
            print("CUDA wheel install failed, trying CPU wheel ...")
            run([sys.executable, '-m', 'pip', 'install', '--quiet', 'torch'])
        import importlib; importlib.invalidate_caches()
        import torch  # noqa: F401

ensure_torch()
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"GPU Count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    props = torch.cuda.get_device_properties(0)
    print(f"GPU Memory: {props.total_memory/1024**3:.1f} GB")
    run(['nvidia-smi'])

# CSV existence and basic stats
csv_files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in csv_files:
    p = CWD / f
    print(f"{f}: exists={p.exists()} size={p.stat().st_size if p.exists() else 'NA'}")

train_df = pd.read_csv('train.csv')
depths_df = pd.read_csv('depths.csv')
sub_df = pd.read_csv('sample_submission.csv')
print('train.csv shape:', train_df.shape)
print(train_df.head(5))
print('depths.csv shape:', depths_df.shape)
print(depths_df.head(5))
print('sample_submission.csv shape:', sub_df.shape)
print(sub_df.head(5))

# Empty mask stats
is_empty = train_df['rle_mask'].isna() | (train_df['rle_mask'].astype(str).str.len() == 0) | (train_df['rle_mask'].astype(str) == 'nan')
empty_pct = is_empty.mean()*100
print(f"Empty masks: {is_empty.sum()}/{len(train_df)} = {empty_pct:.2f}%")

# Depth coverage for train ids
has_depth = train_df['id'].isin(depths_df['id'])
print(f"Train ids with depth: {has_depth.mean()*100:.2f}%")

# Image directories audit
paths = {'train_images': CWD/'train'/'images', 'test_images': CWD/'test'/'images', 'train_masks_dir': CWD/'train'/'masks'}
for k, p in paths.items():
    print(f"{k}: exists={p.exists()} path={p}")
    if p.exists():
        cnt = len(list(p.rglob('*.png')))
        print(f"  *.png count: {cnt}")

# Inspect one sample image (if available)
sample_id = None
img_path = None
if (CWD/'train'/'images').exists():
    ids = train_df['id'].tolist()
    random.shuffle(ids)
    for _id in ids[:50]:
        ipath = CWD/'train'/'images'/f"{_id}.png"
        if ipath.exists():
            sample_id = _id
            img_path = ipath
            break
    if img_path is None:
        pngs = list((CWD/'train'/'images').glob('*.png'))
        if pngs:
            img_path = pngs[0]
            sample_id = img_path.stem

if img_path and Path(img_path).exists():
    with Image.open(img_path) as im:
        print(f"Sample image: {img_path.name} | size={im.size} | mode={im.mode}")
else:
    print("No sample image found under train/images")

print(f"Audit done in {time.time()-start_time:.1f}s")

In [None]:
# Environment and Data Audit
import os, sys, platform, time, subprocess, textwrap, random
from pathlib import Path
import pandas as pd
import numpy as np
from PIL import Image

start_time = time.time()
CWD = Path.cwd()
print(f"CWD: {CWD}")
print(f"Python: {sys.version.split()[0]} | Platform: {platform.platform()}")

def run(cmd):
    print(f"$ {' '.join(cmd)}")
    try:
        out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, text=True)
        print(out)
    except subprocess.CalledProcessError as e:
        print(e.output)

# Try Torch and GPU
def ensure_torch():
    try:
        import torch  # noqa: F401
        return
    except Exception as e:
        print(f"Torch import failed: {e}\nInstalling torch (CUDA 12.1) ...")
        run([sys.executable, '-m', 'pip', 'install', '--quiet', '--upgrade', 'pip'])
        # Prefer CUDA wheels; fallback to CPU if needed
        rc = subprocess.call([sys.executable, '-m', 'pip', 'install', '--quiet', '--index-url', 'https://download.pytorch.org/whl/cu121', 'torch'])
        if rc != 0:
            print("CUDA wheel install failed, trying CPU wheel ...")
            run([sys.executable, '-m', 'pip', 'install', '--quiet', 'torch'])
        import importlib; importlib.invalidate_caches()
        import torch  # noqa: F401

ensure_torch()
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"GPU Count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    props = torch.cuda.get_device_properties(0)
    print(f"GPU Memory: {props.total_memory/1024**3:.1f} GB")
    run(['nvidia-smi'])

# CSV existence and basic stats
csv_files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in csv_files:
    p = CWD / f
    print(f"{f}: exists={p.exists()} size={p.stat().st_size if p.exists() else 'NA'}")

train_df = pd.read_csv('train.csv')
depths_df = pd.read_csv('depths.csv')
sub_df = pd.read_csv('sample_submission.csv')
print('train.csv shape:', train_df.shape)
print(train_df.head(5))
print('depths.csv shape:', depths_df.shape)
print(depths_df.head(5))
print('sample_submission.csv shape:', sub_df.shape)
print(sub_df.head(5))

# Empty mask stats
is_empty = train_df['rle_mask'].isna() | (train_df['rle_mask'].astype(str).str.len() == 0) | (train_df['rle_mask'].astype(str) == 'nan')
empty_pct = is_empty.mean()*100
print(f"Empty masks: {is_empty.sum()}/{len(train_df)} = {empty_pct:.2f}%")

# Depth coverage for train ids
has_depth = train_df['id'].isin(depths_df['id'])
print(f"Train ids with depth: {has_depth.mean()*100:.2f}%")

# Image directories audit
paths = {'train_images': CWD/'train'/'images', 'test_images': CWD/'test'/'images', 'train_masks_dir': CWD/'train'/'masks'}
for k, p in paths.items():
    print(f"{k}: exists={p.exists()} path={p}")
    if p.exists():
        cnt = len(list(p.rglob('*.png')))
        print(f"  *.png count: {cnt}")

# Inspect one sample image (if available)
sample_id = None
if (CWD/'train'/'images').exists():
    # Prefer an id that exists as a file
    ids = train_df['id'].tolist()
    random.shuffle(ids)
    for _id in ids[:50]:
        ipath = CWD/'train'/'images'/f"{_id}.png"
        if ipath.exists():
            sample_id = _id
            img_path = ipath
            break
    if sample_id is None:
        # fallback: pick any png
        pngs = list((CWD/'train'/'images').glob('*.png'))
        if pngs:
            img_path = pngs[0]
            sample_id = img_path.stem
else:
    img_path = None

if img_path and Path(img_path).exists():
    with Image.open(img_path) as im:
        print(f"Sample image: {img_path.name} | size={im.size} | mode={im.mode}")
else:
    print("No sample image found under train/images")

print(f"Audit done in {time.time()-start_time:.1f}s")

In [None]:
# Environment & Data Audit (minimal baseline)
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

root = Path('.')
def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    empty_pct = train_df['rle_mask'].isna().mean()*100
    print(f'Empty-mask %: {empty_pct:.2f}')
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()*100
    print(f'Depths coverage on train ids: {cov:.2f}%')

# Quick file counts for images (if present)
img_dir = Path('train/images'); mask_dir = Path('train/masks'); test_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': img_dir.exists(), 'train/masks': mask_dir.exists(), 'test/images': test_dir.exists()})
print('PNG counts:', {'train_images': count_png(img_dir), 'train_masks': count_png(mask_dir), 'test_images': count_png(test_dir)})

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
except Exception as e:
    print('Torch not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Empty-mask %:', round(train_df['rle_mask'].isna().mean()*100, 2))
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()*100
    print(f'Depths coverage on train ids: {cov:.2f}%')

img_dir = Path('train/images'); mask_dir = Path('train/masks'); test_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': img_dir.exists(), 'train/masks': mask_dir.exists(), 'test/images': test_dir.exists()})
print('PNG counts:', {'train_images': count_png(img_dir), 'train_masks': count_png(mask_dir), 'test_images': count_png(test_dir)})

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
except Exception as e:
    print('Torch not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=3):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 3)
depths_df = head_csv('depths.csv', 3)
sub_df = head_csv('sample_submission.csv', 3)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Empty-mask %:', round(train_df['rle_mask'].isna().mean()*100, 2))
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()*100
    print(f'Depths coverage on train ids: {cov:.2f}%')

img_dir = Path('train/images'); mask_dir = Path('train/masks'); test_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': img_dir.exists(), 'train/masks': mask_dir.exists(), 'test/images': test_dir.exists()})
print('PNG counts:', {'train_images': count_png(img_dir), 'train_masks': count_png(mask_dir), 'test_images': count_png(test_dir)})

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

# GPU check (non-fatal)
try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)
try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

img_dir = Path('train/images'); mask_dir = Path('train/masks'); test_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': img_dir.exists(), 'train/masks': mask_dir.exists(), 'test/images': test_dir.exists()})
print('PNG counts:', {'train_images': count_png(img_dir), 'train_masks': count_png(mask_dir), 'test_images': count_png(test_dir)})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Empty-mask %:', round(train_df['rle_mask'].isna().mean()*100, 2))
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()*100
    print(f'Depths coverage on train ids: {cov:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)
try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

img_dir = Path('train/images'); mask_dir = Path('train/masks'); test_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': img_dir.exists(), 'train/masks': mask_dir.exists(), 'test/images': test_dir.exists()})
print('PNG counts:', {'train_images': count_png(img_dir), 'train_masks': count_png(mask_dir), 'test_images': count_png(test_dir)})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Empty-mask %:', round(train_df['rle_mask'].isna().mean()*100, 2))
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()*100
    print(f'Depths coverage on train ids: {cov:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)

try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

img_dir = Path('train/images'); mask_dir = Path('train/masks'); test_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': img_dir.exists(), 'train/masks': mask_dir.exists(), 'test/images': test_dir.exists()})
print('PNG counts:', {'train_images': count_png(img_dir), 'train_masks': count_png(mask_dir), 'test_images': count_png(test_dir)})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Empty-mask %:', round(train_df['rle_mask'].isna().mean()*100, 2))
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()*100
    print(f'Depths coverage on train ids: {cov:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit (minimal)
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
except Exception as e:
    print('Torch not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Empty-mask %:', round(train_df['rle_mask'].isna().mean()*100, 2))
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()*100
    print(f'Depths coverage on train ids: {cov:.2f}%')

img_dir = Path('train/images'); mask_dir = Path('train/masks'); test_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': img_dir.exists(), 'train/masks': mask_dir.exists(), 'test/images': test_dir.exists()})
print('PNG counts:', {'train_images': count_png(img_dir), 'train_masks': count_png(mask_dir), 'test_images': count_png(test_dir)})

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
except Exception as e:
    print('Torch not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Train empty-mask %:', train_df['rle_mask'].isna().mean()*100)
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()*100
    print(f'Depths coverage on train ids: {cov:.2f}%')

print('Audit done.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

# GPU check (non-fatal)
try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)
try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': train_img_dir.exists(), 'train/masks': train_mask_dir.exists(), 'test/images': test_img_dir.exists()})
print('PNG counts:', {'train_images': count_png(train_img_dir), 'train_masks': count_png(train_mask_dir), 'test_images': count_png(test_img_dir)})

# Sample image info
sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Train null rle %:', train_df['rle_mask'].isna().mean())
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit (minimal)
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
except Exception as e:
    print('Torch not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Train null rle %:', train_df['rle_mask'].isna().mean())
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit done.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

# GPU check (non-fatal)
try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)
try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

# CSV presence and heads
root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

# Image dirs and counts
train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': train_img_dir.exists(), 'train/masks': train_mask_dir.exists(), 'test/images': test_img_dir.exists()})
print('PNG counts:', {'train_images': count_png(train_img_dir), 'train_masks': count_png(train_mask_dir), 'test_images': count_png(test_img_dir)})

# Sample image info
sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

# Depth coverage
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Train null rle %:', train_df['rle_mask'].isna().mean())
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit done.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Train null rle %:', train_df['rle_mask'].isna().mean())
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit done.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

# GPU check (non-fatal)
try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)
try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

# CSVs
root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

# Image dirs
train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': train_img_dir.exists(), 'train/masks': train_mask_dir.exists(), 'test/images': test_img_dir.exists()})
print('PNG counts:', {'train_images': count_png(train_img_dir), 'train_masks': count_png(train_mask_dir), 'test_images': count_png(test_img_dir)})

# Sample image info
sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

# Depth coverage
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Train null rle %:', train_df['rle_mask'].isna().mean())
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit done.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

# GPU check (non-fatal)
try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)
try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

# CSVs
root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

# Image dirs
train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': train_img_dir.exists(), 'train/masks': train_mask_dir.exists(), 'test/images': test_img_dir.exists()})
print('PNG counts:', {'train_images': count_png(train_img_dir), 'train_masks': count_png(train_mask_dir), 'test_images': count_png(test_img_dir)})

# Sample image info
sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

# Depth coverage
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)

try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': train_img_dir.exists(), 'train/masks': train_mask_dir.exists(), 'test/images': test_img_dir.exists()})
print('PNG counts:', {'train_images': count_png(train_img_dir), 'train_masks': count_png(train_mask_dir), 'test_images': count_png(test_img_dir)})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit complete.')

In [None]:
# Quick Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

root = Path('.')
def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None and 'rle_mask' in train_df.columns:
    print('Train null rle %:', train_df['rle_mask'].isna().mean())
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
def count_png(d):
    d = Path(d); return len(list(d.glob('*.png'))) if d.exists() else 0
print('Dirs exist:', {'train/images': train_img_dir.exists(), 'train/masks': train_mask_dir.exists(), 'test/images': test_img_dir.exists()})
print('PNG counts:', {'train_images': count_png(train_img_dir), 'train_masks': count_png(train_mask_dir), 'test_images': count_png(test_img_dir)})

print('Audit done.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

# GPU check (non-fatal)
try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch import failed or CUDA not available:', e)
try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

# CSVs present?
root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def read_head(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = read_head('train.csv', 5)
depths_df = read_head('depths.csv', 5)
sub_df = read_head('sample_submission.csv', 5)

# Image dirs
train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_png(d):
    d = Path(d)
    return len(list(d.glob('*.png'))) if d.exists() else 0
print('PNG counts:', {
    'train_images': count_png(train_img_dir),
    'train_masks': count_png(train_mask_dir),
    'test_images': count_png(test_img_dir),
})

# Peek one image if available
sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

# Depth coverage sanity
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    coverage = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {coverage*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

# GPU check
try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch import failed:', e)
try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

# CSVs
root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def read_head(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = read_head('train.csv', 5)
depths_df = read_head('depths.csv', 5)
sub_df = read_head('sample_submission.csv', 5)

# Dirs
train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_png(d):
    d = Path(d)
    return len(list(d.glob('*.png'))) if d.exists() else 0
print('PNG counts:', {
    'train_images': count_png(train_img_dir),
    'train_masks': count_png(train_mask_dir),
    'test_images': count_png(test_img_dir),
})

# Sample image
sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

# Depth coverage
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    coverage = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {coverage*100:.2f}%')

print('Audit complete.')

In [None]:
# Quick Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def head_csv(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape); print(df.head(n))
    return df

train_df = head_csv('train.csv', 5)
depths_df = head_csv('depths.csv', 5)
sub_df = head_csv('sample_submission.csv', 5)

if train_df is not None:
    print('Train null rle %:', train_df['rle_mask'].isna().mean())
if train_df is not None and depths_df is not None:
    cov = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {cov*100:.2f}%')

print('Audit done.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)

try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def read_head(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = read_head('train.csv', 5)
depths_df = read_head('depths.csv', 5)
sub_df = read_head('sample_submission.csv', 5)

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_png(d):
    d = Path(d)
    return len(list(d.glob('*.png'))) if d.exists() else 0
print('PNG counts:', {
    'train_images': count_png(train_img_dir),
    'train_masks': count_png(train_mask_dir),
    'test_images': count_png(test_img_dir),
})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    coverage = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {coverage*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)

try:
    out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
    print('nvidia-smi -L:\n', out)
except Exception as e:
    print('nvidia-smi not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def read_head(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = read_head('train.csv', 5)
depths_df = read_head('depths.csv', 5)
sub_df = read_head('sample_submission.csv', 5)

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_png(d):
    d = Path(d)
    return len(list(d.glob('*.png'))) if d.exists() else 0
print('PNG counts:', {
    'train_images': count_png(train_img_dir),
    'train_masks': count_png(train_mask_dir),
    'test_images': count_png(test_img_dir),
})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    coverage = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {coverage*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3, 2))
except Exception as e:
    print('Torch not available:', e)

root = Path('.')
for f in ['train.csv', 'depths.csv', 'sample_submission.csv']:
    p = root / f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def read_head(p, n=5):
    p = Path(p)
    if not p.exists():
        print('Missing', p); return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = read_head('train.csv', 5)
depths_df = read_head('depths.csv', 5)
sub_df = read_head('sample_submission.csv', 5)

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_png(d):
    d = Path(d)
    return len(list(d.glob('*.png'))) if d.exists() else 0
print('PNG counts:', {
    'train_images': count_png(train_img_dir),
    'train_masks': count_png(train_mask_dir),
    'test_images': count_png(test_img_dir),
})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir / f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    coverage = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {coverage*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3,2))
except Exception as e:
    print('Torch import failed:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def read_csv_head(p, n=5):
    if not Path(p).exists():
        print(f'Missing {p}'); return None
    df = pd.read_csv(p)
    print(f'{p}: shape {df.shape}')
    print(df.head(n))
    return df

train_df = read_csv_head('train.csv', 5)
depths_df = read_csv_head('depths.csv', 5)
sub_df = read_csv_head('sample_submission.csv', 5)

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_png(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('PNG counts:', {
    'train_images': count_png(train_img_dir),
    'train_masks': count_png(train_mask_dir),
    'test_images': count_png(test_img_dir),
})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    coverage = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {coverage*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3,2))
except Exception as e:
    print('Torch not available:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def read_csv_head(p, n=5):
    if not Path(p).exists():
        print(f'Missing {p}'); return None
    df = pd.read_csv(p)
    print(f'{p}: shape {df.shape}')
    print(df.head(n))
    return df

train_df = read_csv_head('train.csv', 5)
depths_df = read_csv_head('depths.csv', 5)
sub_df = read_csv_head('sample_submission.csv', 5)

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_png(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('PNG counts:', {
    'train_images': count_png(train_img_dir),
    'train_masks': count_png(train_mask_dir),
    'test_images': count_png(test_img_dir),
})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    coverage = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {coverage*100:.2f}%')

print('Audit complete.')

# TGS Salt Identification Challenge — Medal Plan

Objectives:
- Ship a strong baseline fast; iterate to medal via CV-driven improvements.

Data & Files:
- train.csv: id,rle_mask (RLE may be empty — negative examples).
- depths.csv: id,depth; joinable feature.
- Images: 101x101 PNGs at train/images and masks in train/masks (or RLE). Test at test/images.
- sample_submission.csv: required schema (id,rle_mask).

Validation:
- 5-fold KFold with stratification on empty vs non-empty mask; optionally depth bin strat.
- Fixed seed; reuse same folds for all runs.
- CV metric: mean precision IoU at thresholds 0.5..0.95.

Baseline Model:
- U-Net (ResNet18/EfficientNet-B0 encoder via segmentation_models_pytorch).
- Input 128x128 (pad from 101x101); channels: image + depth + coord (y, dist2center).
- Augs: flips, slight shifts/rotations; keep light initially.
- Loss: BCEWithLogits + Lovasz hinge (or Soft Dice).
- Optim: AdamW, cosine schedule, AMP; early stopping on CV.

Inference:
- Flip TTA; average logits; per-fold threshold tuning on OOF.
- Post-process: small-object removal.
- Encode to RLE; create submission.csv.

Iteration Path:
1) Environment/Data audit → implement loader, CV, metric.
2) Train baseline 5-fold; get OOF and LB.
3) Improve: encoder, resolution (256), augs, seeds; simple blends.

Expert Review Checkpoints:
- After env/data audit + CV implementation.
- After baseline OOF + first submission.
- After major improvements (res/encoder/augs/blends).

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version.split('\n')[0])
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

try:
    import torch
    print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU Mem (GB):', round(torch.cuda.get_device_properties(0).total_memory/1024**3,2))
except Exception as e:
    print('Torch import failed:', e)

root = Path('.')
for f in ['train.csv','depths.csv','sample_submission.csv']:
    p = root/f
    print(f'{f}:', 'OK' if p.exists() else 'MISSING', '| size:', p.stat().st_size if p.exists() else '-')

def read_csv_head(p, n=5):
    if not Path(p).exists():
        print(f'Missing {p}'); return None
    df = pd.read_csv(p)
    print(f'{p}: shape {df.shape}')
    print(df.head(n))
    return df

train_df = read_csv_head('train.csv', 5)
depths_df = read_csv_head('depths.csv', 5)
sub_df = read_csv_head('sample_submission.csv', 5)

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_png(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('PNG counts:', {
    'train_images': count_png(train_img_dir),
    'train_masks': count_png(train_mask_dir),
    'test_images': count_png(test_img_dir),
})

sample = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20):
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample = p; break
if sample is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample = imgs[0] if imgs else None
if sample:
    with Image.open(sample) as im:
        print('Sample image:', sample.name, '| mode:', im.mode, '| size:', im.size)
else:
    print('No sample image found.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    coverage = train_df['id'].isin(depths_df['id']).mean()
    print(f'Depths coverage on train ids: {coverage*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
for f in ['train.csv', 'depths.csv', 'sample_submission.csv']:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p; break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
for f in ['train.csv', 'depths.csv', 'sample_submission.csv']:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=3):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p; break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

# CSVs
repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

# Dirs
train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

# Peek one image
sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p; break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

# Depth join sanity
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p
            break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p
            break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p
            break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p
            break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess, glob
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p
            break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(20).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p
            break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

# TGS Salt Identification Challenge — Medal Plan

Objectives:
- Establish reliable CV and a fast baseline segmentation model.
- Iterate with targeted improvements; ensemble if time allows.

Pipeline:
1) Environment check (GPU) and data audit (CSV schema, image counts/sizes).
2) CV: 5-fold KFold, stratify by empty vs non-empty mask and depth bins.
3) Model: U-Net/FPN with ResNet18/EfficientNet-B0 (SMP), 128x128 (pad 101x101).
   - Inputs: image + depth channel + coord channels.
   - Loss: BCEWithLogits + Lovasz; AMP; AdamW + cosine; early stopping.
   - Aug: flips, light intensity/shift.
4) Inference: flip TTA; threshold tuning on OOF; minor postprocess; RLE encode.
5) Iterate: higher res/encoder, seeds, blend diverse models if CV supports.

Expert review checkpoints:
- After env/data audit + CV definition.
- After baseline OOF + first leaderboard submission.
- After each major improvement (res/encoder/augs/ensemble).

In [None]:
# Environment & Data Audit
import os, sys, platform, subprocess
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)
check_nvidia()

try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=3):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv')
depths_df = safe_read_csv('depths.csv')
sub_df = safe_read_csv('sample_submission.csv')

train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    for cid in train_df['id'].astype(str).head(10).tolist():
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p
            break
if sample_img_path is None:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None
if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

In [None]:
# Environment & Data Audit
import os, sys, time, json, glob, shutil, platform, subprocess, textwrap
from pathlib import Path
import pandas as pd
from PIL import Image

print('Python:', sys.version)
print('Platform:', platform.platform())
print('CWD:', os.getcwd())

# Try GPU info via nvidia-smi
def check_nvidia():
    try:
        out = subprocess.check_output(['nvidia-smi', '-L'], stderr=subprocess.STDOUT).decode()
        print('nvidia-smi -L:\n', out)
    except Exception as e:
        print('nvidia-smi not available or failed:', e)

check_nvidia()

# Try torch if available
try:
    import torch
    print('Torch version:', torch.__version__)
    print('CUDA available:', torch.cuda.is_available())
    if torch.cuda.is_available():
        print('GPU count:', torch.cuda.device_count())
        print('GPU name:', torch.cuda.get_device_name(0))
        props = torch.cuda.get_device_properties(0)
        print(f'GPU Memory: {props.total_memory/1024**3:.2f} GB')
except Exception as e:
    print('Torch not available:', e)

repo = Path('.')
files = ['train.csv', 'depths.csv', 'sample_submission.csv']
for f in files:
    p = repo/f
    print(f'{f}:', 'exists' if p.exists() else 'MISSING', 'size:', p.stat().st_size if p.exists() else '-')

def safe_read_csv(p, n=5):
    if not Path(p).exists():
        print(f'CSV missing: {p}')
        return None
    df = pd.read_csv(p)
    print(f'{p} shape:', df.shape)
    print(df.head(n))
    return df

train_df = safe_read_csv('train.csv', 5)
depths_df = safe_read_csv('depths.csv', 5)
sub_df = safe_read_csv('sample_submission.csv', 5)

# Inspect image directories
train_img_dir = Path('train/images')
train_mask_dir = Path('train/masks')
test_img_dir = Path('test/images')
print('Dirs exist:', {
    'train/images': train_img_dir.exists(),
    'train/masks': train_mask_dir.exists(),
    'test/images': test_img_dir.exists(),
})
def count_pngs(d):
    return len(list(Path(d).glob('*.png'))) if Path(d).exists() else 0
print('Counts:', {
    'train_images_png': count_pngs(train_img_dir),
    'train_masks_png': count_pngs(train_mask_dir),
    'test_images_png': count_pngs(test_img_dir),
})

# Peek one image
sample_img_path = None
if train_df is not None and 'id' in train_df.columns:
    cid = train_df['id'].dropna().astype(str).iloc[0] if len(train_df) else None
    if cid:
        p = train_img_dir/f'{cid}.png'
        if p.exists():
            sample_img_path = p
        else:
            # fallback: first png in dir
            imgs = list(train_img_dir.glob('*.png'))
            sample_img_path = imgs[0] if imgs else None
else:
    imgs = list(train_img_dir.glob('*.png'))
    sample_img_path = imgs[0] if imgs else None

if sample_img_path is not None:
    with Image.open(sample_img_path) as im:
        print('Sample image:', sample_img_path, 'mode:', im.mode, 'size:', im.size)
else:
    print('No sample image found to inspect.')

# Basic sanity: merge depths with train ids if applicable
if train_df is not None and depths_df is not None and 'id' in train_df.columns and 'id' in depths_df.columns:
    has_depths = train_df['id'].isin(depths_df['id']).mean()
    print(f'% train ids with depth info: {has_depths*100:.2f}%')

print('Audit complete.')

# TGS Salt Identification Challenge - Plan

Goal: WIN A MEDAL. Build a strong, fast baseline and iterate using reliable CV.

Plan:
- Environment: verify GPU availability and stability (PyTorch CUDA).
- Data audit:
  - Files: train.csv (id,rle_mask), depths.csv (id,depth), sample_submission.csv.
  - Images: expect 101x101 grayscale PNGs in train/images and masks in train/masks (or masks via RLE in train.csv). Test images in test/images.
  - Confirm paths, counts, sizes.
- Validation:
  - 5-fold KFold with stratification on salt presence (mask empty vs non-empty) and depth bins, fixed seed.
  - CV metric: mean precision IoU at thresholds (compute via official AP-IoU).
- Baseline model:
  - U-Net/FPN with lightweight encoder (ResNet18/EfficientNet-B0) via segmentation_models_pytorch.
  - Input: pad 101x101 to 128x128; add depth channel and coordinate channels.
  - Aug: flips, light color/shift; keep simple initially.
  - Loss: BCEWithLogits + Lovasz hinge.
  - Optim: AdamW, cosine schedule, early stopping; AMP on GPU.
- Inference:
  - TTA (flips), average logits, threshold tuning per fold on OOF.
  - Post-process: small-object removal.
  - Encode masks as RLE for submission.csv.
- Iteration loop:
  1) Ship working baseline quickly.
  2) Error analysis on OOF and threshold tuning.
  3) Improve with higher-res/encoder and simple ensembling if time remains.

Checkpoints for Expert Review:
- After this plan + environment check.
- After data audit and CV split implementation.
- After baseline training (OOF) and first submission.
- After each major improvement (resolution/encoder/augment/ensemble).

# TGS Salt Identification Challenge - Plan

Goal: WIN A MEDAL. Build a strong, fast baseline and iterate using reliable CV.

Plan:
- Environment: verify GPU availability and stability (PyTorch CUDA).
- Data audit:
  - Files: train.csv (id,rle_mask), depths.csv (id,depth), sample_submission.csv.
  - Images: expect 101x101 grayscale PNGs in train/images and masks in train/masks (or masks via RLE in train.csv). Test images in test/images.
  - Confirm paths, counts, sizes.
- Validation:
  - 5-fold KFold with stratification on salt presence (mask empty vs non-empty) and depth bins, fixed seed.
  - CV metric: mean precision IoU at thresholds (compute via fast AP-IoU implementation).
- Baseline model:
  - U-Net (or FPN) with lightweight encoder (e.g., EfficientNet-B0/ResNet18) via segmentation_models_pytorch.
  - Input: pad 101x101 to 128x128; add depth channel and coordinate channels (relative y and distance transform).
  - Aug: horizontal/vertical flips, light shifts; keep simple initially.
  - Loss: BCEWithLogits + Lovasz hinge (per-pixel BCE + IoU surrogate).
  - Optim: AdamW, cosine schedule, early stopping on CV.
  - Mixed precision (amp) on GPU.
- Inference:
  - TTA (flips), average logits, threshold tuning per fold on OOF.
  - Post-process: small-object removal; optional contour smoothing.
  - Encode masks as RLE for submission.csv.
- Iteration loop:
  1) Ship working baseline quickly (1-2 hours max).
  2) Error analysis on OOF: optimize thresholds, check empty-mask handling.
  3) Improve with higher-res (256), better encoder, and ensembling if time remains.

Checkpoints for Expert Review:
- After this plan + environment check.
- After data audit and CV split implementation.
- After baseline training (OOF) and first submission.
- After each major improvement (resolution/encoder/augment/ensemble).