In [3]:
!pip install segmentation-models-pytorch -q

import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import segmentation_models_pytorch as smp
from sklearn.model_selection import train_test_split

# --- 2. CONFIGURATION ---
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 16          
EPOCHS = 15              
LR = 0.0001              
IMG_SIZE = (256, 256)    

# HARDCODED PATHS (Adjust if needed based on your specific input folder)
TRAIN_RGB_DIR = '/kaggle/input/depth-estimation-challenge/dataset_ready_for_kaggle/train/images'
TRAIN_DEPTH_DIR = '/kaggle/input/depth-estimation-challenge/dataset_ready_for_kaggle/train/depth'
TEST_RGB_DIR = '/kaggle/input/depth-estimation-challenge/dataset_ready_for_kaggle/test/images'
OUTPUT_DIR = '/kaggle/working/predictions'
SUBMISSION_FILE = 'submission.csv'

os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"Running on device: {DEVICE}")

# --- 3. HELPER: COMPETITION METRIC LOGIC ---
# This function mimics EXACTLY how imgs2csv.py calculates the value for the CSV
def get_mean_intensity(image_tensor):
    # Convert tensor (0-1) back to numpy (0-255)
    img_np = image_tensor.squeeze().cpu().detach().numpy()
    img_uint8 = (img_np * 255).astype(np.uint8)
    
    # Logic from provided imgs2csv.py
    image = img_uint8.astype(np.float32)
    if np.max(image) > 1.0: image = image / 255.0 
    min_val = np.min(image)
    max_val = np.max(image)
    
    if max_val - min_val > 0:
        image = (image - min_val) / (max_val - min_val)
    else:
        image = image - min_val 
        
    image_norm = np.uint8(image * 255.)
    return np.mean(image_norm)

# --- 4. DATASET CLASS ---
class DepthDataset(Dataset):
    def __init__(self, file_list, rgb_dir, depth_dir=None, is_test=False):
        self.file_list = file_list
        self.rgb_dir = rgb_dir
        self.depth_dir = depth_dir
        self.is_test = is_test

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img_name = self.file_list[idx]
        rgb_path = os.path.join(self.rgb_dir, img_name)
        
        image = cv2.imread(rgb_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, IMG_SIZE)
        image = image / 255.0
        image = np.transpose(image, (2, 0, 1))
        
        sample = {'image': torch.tensor(image, dtype=torch.float32), 'name': img_name}

        if not self.is_test and self.depth_dir:
            depth_path = os.path.join(self.depth_dir, img_name)
            if not os.path.exists(depth_path):
                pre, ext = os.path.splitext(img_name)
                depth_path = os.path.join(self.depth_dir, pre + ".png")
            
            depth = cv2.imread(depth_path, cv2.IMREAD_GRAYSCALE)
            if depth is None:
                depth = np.zeros(IMG_SIZE, dtype=np.float32)
            else:
                depth = cv2.resize(depth, IMG_SIZE)
                depth = depth / 255.0
            
            depth = np.expand_dims(depth, axis=0)
            sample['depth'] = torch.tensor(depth, dtype=torch.float32)

        return sample

# --- 5. SPLIT DATA (TRAIN vs VALIDATION) ---
print("Splitting Data...")
all_files = sorted([f for f in os.listdir(TRAIN_RGB_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])

# Use 85% for Training, 15% for Validation/Calculating Error
train_files, val_files = train_test_split(all_files, test_size=0.15, random_state=42)

train_dataset = DepthDataset(train_files, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR)
val_dataset = DepthDataset(val_files, TRAIN_RGB_DIR, TRAIN_DEPTH_DIR)
test_dataset = DepthDataset(sorted(os.listdir(TEST_RGB_DIR)), TEST_RGB_DIR, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=2) # Batch 1 for accurate calc
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2)

print(f"Training on {len(train_files)} images, Validating on {len(val_files)} images.")

# --- 6. MODEL SETUP ---
model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
    activation='sigmoid'
).to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.MSELoss() 

# --- 7. TRAINING WITH ERROR CALCULATION ---
best_val_rmse = float('inf')

for epoch in range(EPOCHS):
    # A. Training Step
    model.train()
    train_loss = 0.0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]"):
        images = batch['image'].to(DEVICE)
        depths = batch['depth'].to(DEVICE)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, depths)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # B. Validation & Error Calculation Step
    model.eval()
    pred_means = []
    gt_means = []
    
    # We calculate the exact CSV metric (RMSE of Mean Intensities)
    with torch.no_grad():
        for batch in val_loader:
            images = batch['image'].to(DEVICE)
            depths = batch['depth'].to(DEVICE)
            
            preds = model(images)
            
            # Calculate mean intensity for Prediction and Ground Truth
            # (Using the helper function that copies imgs2csv.py logic)
            val_pred_mean = get_mean_intensity(preds[0])
            val_gt_mean = get_mean_intensity(depths[0])
            
            pred_means.append(val_pred_mean)
            gt_means.append(val_gt_mean)

    # Calculate RMSE between predicted means and actual means
    pred_means = np.array(pred_means)
    gt_means = np.array(gt_means)
    val_rmse = np.sqrt(np.mean((pred_means - gt_means)**2))

    print(f"Epoch {epoch+1} Results:")
    print(f"  > Train Pixel Loss: {train_loss/len(train_loader):.5f}")
    print(f"  > Validation RMSE (Score Estimate): {val_rmse:.5f}")

    # Save Best Model based on this RMSE score
    if val_rmse < best_val_rmse:
        best_val_rmse = val_rmse
        torch.save(model.state_dict(), 'best_model.pth')
        print(f"  --> New Best Score! Model Saved.")

# --- 8. FINAL INFERENCE ---
print(f"\nTraining Done. Best Validation RMSE was: {best_val_rmse:.5f}")
print("Generating Final Predictions...")
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Saving Images"):
        images = batch['image'].to(DEVICE)
        filenames = batch['name']
        preds = model(images)
        pred_depth = preds[0].squeeze().cpu().numpy()
        pred_depth_uint8 = (pred_depth * 255).astype(np.uint8)
        
        save_path = os.path.join(OUTPUT_DIR, filenames[0])
        if not save_path.endswith(('.jpg', '.png')): save_path += ".png"
        cv2.imwrite(save_path, pred_depth_uint8)

# --- 9. SUBMISSION CSV ---
def images_to_mean_csv(image_folder, output_csv):
    data = []
    print(f"\nProcessing submission file...")
    files = sorted([f for f in os.listdir(image_folder) if f.endswith(".png") or f.endswith(".jpg")])
    for filename in tqdm(files):
        filepath = os.path.join(image_folder, filename)
        image = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
        if image is None: continue
        
        # Exact logic replication
        image = image.astype(np.float32)
        if np.max(image) > 1.0: image = image / 255.0 
        min_val = np.min(image)
        max_val = np.max(image)
        if max_val - min_val > 0: image = (image - min_val) / (max_val - min_val)
        else: image = image - min_val 
        image_norm = np.uint8(image * 255.)
        mean_val = np.mean(image_norm)
        data.append([filename, mean_val])
    
    df = pd.DataFrame(data, columns=["id", "Predicted"])
    df.to_csv(output_csv, index=False)
    print("Done!")

images_to_mean_csv(OUTPUT_DIR, SUBMISSION_FILE)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.8/154.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m75.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m58.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━



Running on device: cuda
Splitting Data...
Training on 6800 images, Validating on 1200 images.


config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/87.3M [00:00<?, ?B/s]

Epoch 1/15 [Train]: 100%|██████████| 425/425 [01:29<00:00,  4.76it/s]


Epoch 1 Results:
  > Train Pixel Loss: 0.03241
  > Validation RMSE (Score Estimate): 33.87635
  --> New Best Score! Model Saved.


Epoch 2/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 2 Results:
  > Train Pixel Loss: 0.01805
  > Validation RMSE (Score Estimate): 28.77618
  --> New Best Score! Model Saved.


Epoch 3/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 3 Results:
  > Train Pixel Loss: 0.01277
  > Validation RMSE (Score Estimate): 29.15616


Epoch 4/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 4 Results:
  > Train Pixel Loss: 0.01037
  > Validation RMSE (Score Estimate): 21.43074
  --> New Best Score! Model Saved.


Epoch 5/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 5 Results:
  > Train Pixel Loss: 0.00876
  > Validation RMSE (Score Estimate): 23.23019


Epoch 6/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 6 Results:
  > Train Pixel Loss: 0.00803
  > Validation RMSE (Score Estimate): 20.59812
  --> New Best Score! Model Saved.


Epoch 7/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 7 Results:
  > Train Pixel Loss: 0.00699
  > Validation RMSE (Score Estimate): 18.64773
  --> New Best Score! Model Saved.


Epoch 8/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 8 Results:
  > Train Pixel Loss: 0.00641
  > Validation RMSE (Score Estimate): 22.28557


Epoch 9/15 [Train]: 100%|██████████| 425/425 [01:32<00:00,  4.57it/s]


Epoch 9 Results:
  > Train Pixel Loss: 0.00581
  > Validation RMSE (Score Estimate): 19.68327


Epoch 10/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.56it/s]


Epoch 10 Results:
  > Train Pixel Loss: 0.00524
  > Validation RMSE (Score Estimate): 17.70754
  --> New Best Score! Model Saved.


Epoch 11/15 [Train]: 100%|██████████| 425/425 [01:32<00:00,  4.57it/s]


Epoch 11 Results:
  > Train Pixel Loss: 0.00476
  > Validation RMSE (Score Estimate): 20.20825


Epoch 12/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 12 Results:
  > Train Pixel Loss: 0.00449
  > Validation RMSE (Score Estimate): 19.80146


Epoch 13/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 13 Results:
  > Train Pixel Loss: 0.00425
  > Validation RMSE (Score Estimate): 17.33898
  --> New Best Score! Model Saved.


Epoch 14/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 14 Results:
  > Train Pixel Loss: 0.00388
  > Validation RMSE (Score Estimate): 18.05285


Epoch 15/15 [Train]: 100%|██████████| 425/425 [01:33<00:00,  4.57it/s]


Epoch 15 Results:
  > Train Pixel Loss: 0.00380
  > Validation RMSE (Score Estimate): 18.39872

Training Done. Best Validation RMSE was: 17.33898
Generating Final Predictions...


Saving Images: 100%|██████████| 1000/1000 [00:10<00:00, 94.07it/s]



Processing submission file...


100%|██████████| 1000/1000 [00:00<00:00, 1482.99it/s]

Done!



