In [1]:
"""
train_finetune_colab.py

This script allows you to fine-tune the ResNet18 model on the Chest X-ray Pneumonia dataset.
- Downloads dataset from Kaggle
- Sets up Google Drive for saving model weights
- Trains for 3 epochs (can be modified)
- Saves fine-tuned weights to Google Drive
- Runs main.py for inference and generates submission.csv
"""

'\ntrain_finetune_colab.py\n\nThis script allows you to fine-tune the ResNet18 model on the Chest X-ray Pneumonia dataset.\n- Downloads dataset from Kaggle\n- Sets up Google Drive for saving model weights\n- Trains for 3 epochs (can be modified)\n- Saves fine-tuned weights to Google Drive\n- Runs main.py for inference and generates submission.csv\n'

In [3]:
import os
import torch
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
import torch.nn as nn
from google.colab import drive
import gdown
import json

# ----------------------------------------
# Check GPU
# ----------------------------------------
print("GPU Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# ----------------------------------------
# Clone repository (if not already)
# ----------------------------------------
repo_path = "/content/radiology-agent-adk-capstone"
if not os.path.exists(repo_path):
    !git clone https://github.com/reyhan1994/radiology-agent-adk-capstone.git
os.chdir(repo_path)
print("Repo files:", os.listdir(repo_path))

# ----------------------------------------
# Install dependencies
# ----------------------------------------
!pip install -q torch torchvision kaggle gdown

GPU Available: True
GPU Name: Tesla T4
Using device: cuda
Repo files: ['memory', 'sample_images', 'main.py', 'training', 'models', 'requirements.txt', 'utils', '.gitignore', '.git', 'LICENSE', 'README.md', 'master_agent.py', 'agents', 'adk']


In [4]:
# Setup Kaggle API (environment variables recommended)
# ----------------------------------------your_username
os.environ["KAGGLE_USERNAME"] = "your_username"
os.environ["KAGGLE_KEY"] = "your_api_key"
kaggle_username = os.environ.get("KAGGLE_USERNAME")
kaggle_key      = os.environ.get("KAGGLE_KEY")
if kaggle_username and kaggle_key:
    os.makedirs("/root/.kaggle", exist_ok=True)
    with open("/root/.kaggle/kaggle.json", "w") as f:
        json.dump({"username": kaggle_username, "key": kaggle_key}, f)
    os.chmod("/root/.kaggle/kaggle.json", 0o600)
    !kaggle -v
else:
    print("⚠️ Kaggle credentials not set. Set KAGGLE_USERNAME and KAGGLE_KEY env variables.")


Kaggle API 1.7.4.5


In [5]:
# Download dataset
# ----------------------------------------
dataset_dir = "/content/pneumonia_dataset"
if not os.path.exists(dataset_dir):
    !kaggle datasets download -d paultimothymooney/chest-xray-pneumonia --unzip -p {dataset_dir}

# Fix folder structure
train_dir = os.path.join(dataset_dir, "chest_xray/train")
test_dir  = os.path.join(dataset_dir, "chest_xray/test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)


In [6]:
# Mount Google Drive for storing model weights
# ----------------------------------------
drive.mount('/content/drive')
weights_path = "/content/drive/MyDrive/radiology_agent/models/chest_classifier.pt"
os.makedirs(os.path.dirname(weights_path), exist_ok=True)

# ----------------------------------------
# Download fine-tuned weights if not present
# ----------------------------------------
if not os.path.exists(weights_path):
    gdown.download(
        url="https://drive.google.com/uc?id=1mDpUmGjR5OKXodd8DxFJVsR-iMsrPuIb",
        output=weights_path,
        quiet=False
    )
print("✅ Model weights path:", weights_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Model weights path: /content/drive/MyDrive/radiology_agent/models/chest_classifier.pt


In [7]:
# Define image transformations
# ----------------------------------------
tf = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.Grayscale(num_output_channels=3),  # Convert 1-channel X-ray to 3-channel
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])  # ImageNet normalization
])

# ----------------------------------------
# Create Dataset and DataLoader
# ----------------------------------------
train_ds = datasets.ImageFolder(train_dir, transform=tf)
test_ds  = datasets.ImageFolder(test_dir, transform=tf)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=16, shuffle=False)

# ----------------------------------------
# Load pretrained ResNet18 model
# ----------------------------------------
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 3)  # Replace final layer for 3 classes: Normal / Pneumonia / Other
model = model.to(device)

# ----------------------------------------
# Define loss function and optimizer
# ----------------------------------------
criterion = nn.CrossEntropyLoss()
opt = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)

# ----------------------------------------
# Training loop
# ----------------------------------------
epochs = 3
for epoch in range(epochs):
    model.train()
    running_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        opt.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        opt.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {running_loss/len(train_loader):.4f}")

# ----------------------------------------
# Save trained model weights to Google Drive
# ----------------------------------------
torch.save(model.state_dict(), weights_path )
print("✅ Fine-tune done. Weights saved to:", weights_path )

Epoch 1/3 - Loss: 0.1207
Epoch 2/3 - Loss: 0.0310
Epoch 3/3 - Loss: 0.0142
✅ Fine-tune done. Weights saved to: /content/drive/MyDrive/radiology_agent/models/chest_classifier.pt


In [8]:
# Pull latest GitHub updates and run main.py for Kaggle submission
# ----------------------------------------
!git pull
!PYTHONPATH=. python main.py --input sample_images --output submission.csv


Already up to date.
Found 10 images
✅ Loaded fine-tuned model weights from /content/drive/MyDrive/radiology_agent/models/chest_classifier.pt
Processing: sample_images/005.jpeg
-> artifacts returned: {'user_request': 'sample_images/005.jpeg', 'patient_data': {'patient_id': 'sample_images/005.jpeg', 'name': 'Ali Ahmadi', 'age': 45}, 'analysis_findings': {'pathology': 'Pneumonia', 'confidence': 1.0}, 'coding_result': {'ICD_10': 'J18.9', 'CPT': '71046'}, 'final_report': 'Final Report: Pneumonia for patient Ali Ahmadi. Confidence: 1.00.', 'memory_status': 'Consolidation Successful'}
Processing: sample_images/011.jpeg
-> artifacts returned: {'user_request': 'sample_images/011.jpeg', 'patient_data': {'patient_id': 'sample_images/011.jpeg', 'name': 'Ali Ahmadi', 'age': 45}, 'analysis_findings': {'pathology': 'Pneumonia', 'confidence': 0.8982}, 'coding_result': {'ICD_10': 'J18.9', 'CPT': '71046'}, 'final_report': 'Final Report: Pneumonia for patient Ali Ahmadi. Confidence: 0.90.', 'memory_statu

In [None]:
# Download submission CSV locally
# ----------------------------------------
from google.colab import files
files.download("submission.csv")

In [None]:
# Download model weights locally (optional)
# ----------------------------------------
weights_path = save_path
files.download(weights_path)
