In [1]:
## -1. Emergency Process Cleanup
# The environment has zombie processes consuming GPU memory.
# This cell attempts to forcefully terminate them using their PIDs from the last OOM error.
import os
import signal

# PIDs from the last OutOfMemoryError traceback
pids_to_kill = [65870, 160892, 202112, 414196, 454240]

print("Attempting to terminate zombie processes...")
for pid in pids_to_kill:
    try:
        os.kill(pid, signal.SIGKILL) # SIGKILL is a forceful way to terminate
        print(f"Successfully sent SIGKILL to PID {pid}")
    except ProcessLookupError:
        print(f"Process with PID {pid} not found. It may have already terminated.")
    except Exception as e:
        print(f"Error killing PID {pid}: {e}")

print("Process termination attempt finished.")

# Run nvidia-smi to check GPU memory status after the cleanup attempt.
print("\n--- Running nvidia-smi to check GPU status ---")
os.system('nvidia-smi')

Attempting to terminate zombie processes...
Process with PID 65870 not found. It may have already terminated.
Process with PID 160892 not found. It may have already terminated.
Process with PID 202112 not found. It may have already terminated.
Process with PID 414196 not found. It may have already terminated.
Process with PID 454240 not found. It may have already terminated.
Process termination attempt finished.

--- Running nvidia-smi to check GPU status ---
Fri Sep 26 23:20:27 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.06             Driver Version: 550.144.06     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         | 

0

In [None]:
## 0. Environment Cleanup
# Attempt to clear GPU memory from stale processes before starting.
import torch
import gc

if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"Initial GPU Memory reserved: {torch.cuda.memory_reserved() / 1E9:.2f} GB")
    print(f"Initial GPU Memory allocated: {torch.cuda.memory_allocated() / 1E9:.2f} GB")

gc.collect()
print("CUDA cache and garbage collection cleanup attempted.")

In [None]:
## 1. Imports & Configuration
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm.auto import tqdm
import gc
import random

# --- Centralized Preprocessing Function ---
# This should be identical to the one used in training.
def asinh_transform(x):
    return np.arcsinh(x)

def clip_and_normalize(x, clip_percentiles=(0.1, 99.9)):
    lower, upper = np.percentile(x, clip_percentiles)
    x_clipped = np.clip(x, lower, upper)
    x_normalized = (x_clipped - x_clipped.min()) / (x_clipped.max() - x_clipped.min())
    return x_normalized

def load_and_preprocess(file_path, transform_type='asinh', clip_percentiles=(0.1, 99.9)):
    x = np.load(file_path).astype(np.float32)
    
    # The data has 6 signals, but the model expects 3 channels.
    # A common strategy is to use the first 3 (on-target).
    # This must match the preprocessing used in training.
    x = x[:3] # Select the first 3 signals -> shape (3, H, W)
    
    # Apply transform if specified
    if transform_type == 'asinh':
        x = asinh_transform(x)
    
    # Normalize each of the 3 channels independently
    channels = []
    for i in range(x.shape[0]):
        ch_normalized = clip_and_normalize(x[i], clip_percentiles)
        channels.append(ch_normalized)
    x = np.stack(channels, axis=0)
    
    # Reshape to (H, W, C) for albumentations
    x = np.transpose(x, (1, 2, 0))
    return x

# --- Determinism ---
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class CFG:
    # General
    seed = 42
    
    # Paths
    data_dir = '.'
    test_path = os.path.join(data_dir, 'test')
    sample_submission_path = os.path.join(data_dir, 'sample_submission.csv')
    model_dir = '.'
    
    # Preprocessing (must match training)
    preprocess_transform_type = 'asinh'
    clip_percentiles = (0.1, 99.9)
    
    # Model (must match training)
    model_name = 'tf_efficientnet_b3_ns'
    img_size = 256
    in_channels = 3
    num_classes = 1
    
    # Inference
    batch_size = 16 # <-- AGGRESSIVELY REDUCED BATCH SIZE TO PREVENT OOM
    n_folds = 2
    use_tta = False # <-- DISABLED TTA TO PREVENT OOM
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- Apply Seed ---
seed_everything(CFG.seed)

print(f"Using device: {CFG.device}")
print(f"TTA Enabled: {CFG.use_tta}")
print(f"Model: {CFG.model_name}")
print(f"Ensembling {CFG.n_folds} folds.")

In [None]:
## 2. Dataset, Model, and Inference Functions

# --- Test Dataset ---
# This is a simplified version of the training dataset, without labels.
class SETITestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_paths = df['file_path'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        
        # Use the centralized preprocessing function from the first cell
        image = load_and_preprocess(
            file_path,
            transform_type=CFG.preprocess_transform_type,
            clip_percentiles=CFG.clip_percentiles
        )
        
        # Ensure image is HWC for Albumentations
        if image.ndim == 3 and image.shape[0] == 3:
            image = np.transpose(image, (1, 2, 0))
        
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
            
        return image

# --- Model Definition (must match training) ---
class SETIModel(nn.Module):
    def __init__(self, model_name=CFG.model_name, pretrained=False): # Set pretrained=False for inference
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=CFG.in_channels, num_classes=CFG.num_classes)

    def forward(self, x):
        x = self.model(x)
        return x

# --- Inference Function ---
def inference_fn(test_loader, model, device):
    model.eval()
    preds = []
    pbar = tqdm(test_loader, desc='Inferring')
    with torch.no_grad():
        for images in pbar:
            images = images.to(device)
            y_preds = model(images)
            preds.append(y_preds.sigmoid().to('cpu').numpy())
    
    predictions = np.concatenate(preds).flatten()
    return predictions

In [None]:
## 3. Main Inference Loop

# --- Define Transforms ---
# Base transform (no augmentation)
def get_base_transforms():
    return A.Compose([
        A.Resize(CFG.img_size, CFG.img_size),
        ToTensorV2(),
    ])

# TTA transform (with horizontal flip)
def get_tta_transforms():
    return A.Compose([
        A.Resize(CFG.img_size, CFG.img_size),
        A.HorizontalFlip(p=1.0), # Always apply for TTA
        ToTensorV2(),
    ])

# --- Prepare Test Data ---
test_df = pd.read_csv(CFG.sample_submission_path)

def get_test_file_path(image_id):
    return f"{CFG.test_path}/{image_id[0]}/{image_id}.npy"

test_df['file_path'] = test_df['id'].apply(get_test_file_path)
print(f"Test dataframe shape: {test_df.shape}")
print(test_df.head())

# --- Run Inference ---
final_preds = np.zeros(len(test_df))

for fold in range(CFG.n_folds):
    print(f"\n========== INFERRING FOLD {fold} ==========")
    
    # --- Load Model ---
    model_path = os.path.join(CFG.model_dir, f'{CFG.model_name}_fold{fold}_best.pth')
    model = SETIModel(pretrained=False)
    model.load_state_dict(torch.load(model_path))
    model.to(CFG.device)
    
    # --- Base Inference ---
    test_dataset = SETITestDataset(test_df, transform=get_base_transforms())
    test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=0, pin_memory=True)
    fold_preds = inference_fn(test_loader, model, CFG.device)
    
    # --- TTA Inference ---
    if CFG.use_tta:
        print("Running TTA (Horizontal Flip)...")
        tta_dataset = SETITestDataset(test_df, transform=get_tta_transforms())
        tta_loader = DataLoader(tta_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=0, pin_memory=True)
        tta_preds = inference_fn(tta_loader, model, CFG.device)
        # Average base and TTA predictions for this fold
        fold_preds = (fold_preds + tta_preds) / 2.0
    
    # Accumulate predictions (ensembled by averaging)
    final_preds += fold_preds / CFG.n_folds
    
    del model, test_dataset, test_loader
    if CFG.use_tta:
        del tta_dataset, tta_loader
    gc.collect()
    torch.cuda.empty_cache()

# --- Create Submission File ---
submission = test_df[['id']].copy()
submission['target'] = final_preds
submission.to_csv('submission.csv', index=False)

print("\nInference complete.")
print("Submission file created: submission.csv")
print(submission.head())