# Inference Pipeline for Medical Image Analysis

This notebook performs inference on new images using:
1. CNN features (ResNet18)
2. Radiomics features
3. Fusion features (CNN + Radiomics)

Results are saved to `inference/jobs.xlsx` with two sheets:
- `predictions`: Image predictions from each model
- `feature_scores`: Extracted feature vectors for each image

In [None]:
# ========================
# Import Libraries
# ========================
import os
import shutil
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image
import SimpleITK as sitk
from radiomics import featureextractor
import logging
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')
logging.getLogger('radiomics').setLevel(logging.ERROR)

print("Libraries imported successfully!")

In [None]:
# ========================
# Setup Directories and Paths
# ========================
BASE_DIR = Path(r'C:/FeatureEx')
INFERENCE_DIR = BASE_DIR / 'inference'
DONE_DIR = INFERENCE_DIR / 'done'
MASKS_DIR = BASE_DIR / 'masks'
JOBS_FILE = INFERENCE_DIR / 'jobs.xlsx'
MODEL_PATH = BASE_DIR / 'best_resnet_model.pth'

# Create directories if they don't exist
INFERENCE_DIR.mkdir(exist_ok=True)
DONE_DIR.mkdir(exist_ok=True)

print(f"Inference directory: {INFERENCE_DIR}")
print(f"Done directory: {DONE_DIR}")
print(f"Jobs file: {JOBS_FILE}")
print(f"\nDirectories ready!")

In [None]:
# ========================
# Initialize Excel File
# ========================
def initialize_jobs_file():
    """Create jobs.xlsx with two sheets if it doesn't exist"""
    if not JOBS_FILE.exists():
        # Create predictions sheet
        predictions_df = pd.DataFrame(columns=[
            'filename', 'timestamp', 'cnn_prediction', 
            'radiomic_prediction', 'fusion_prediction'
        ])
        
        # Create feature_scores sheet (will be dynamically sized)
        feature_scores_df = pd.DataFrame(columns=['filename'])
        
        # Write to Excel
        with pd.ExcelWriter(JOBS_FILE, engine='openpyxl') as writer:
            predictions_df.to_excel(writer, sheet_name='predictions', index=False)
            feature_scores_df.to_excel(writer, sheet_name='feature_scores', index=False)
        
        print(f"Created new jobs file: {JOBS_FILE}")
    else:
        print(f"Jobs file already exists: {JOBS_FILE}")

initialize_jobs_file()

In [None]:
# ========================
# Define ResNet Model
# ========================
class ResNetFeatureExtractor(nn.Module):
    """
    ResNet-based model for classification and feature extraction.
    """
    def __init__(self, num_classes=3, feature_dim=512, pretrained=True):
        super(ResNetFeatureExtractor, self).__init__()
        
        # Load pretrained ResNet18
        self.resnet = models.resnet18(pretrained=pretrained)
        
        # Modify first conv layer for grayscale (1 channel) input
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # Remove the original fully connected layer
        num_features = self.resnet.fc.in_features  # 512 for ResNet18
        self.resnet.fc = nn.Identity()
        
        # Custom feature extraction and classification layers
        self.feature_extractor = nn.Sequential(
            nn.Linear(num_features, feature_dim),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        self.classifier = nn.Linear(feature_dim, num_classes)
    
    def forward(self, x):
        # Extract features using ResNet backbone
        x = self.resnet(x)
        
        # Extract custom features
        features = self.feature_extractor(x)
        
        # Classification
        logits = self.classifier(features)
        
        return logits, features

print("ResNet model class defined!")

In [None]:
# ========================
# Load Trained Models
# ========================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load CNN model
cnn_model = ResNetFeatureExtractor(num_classes=3, feature_dim=512, pretrained=False)

if MODEL_PATH.exists():
    checkpoint = torch.load(MODEL_PATH, map_location=device)
    cnn_model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Loaded CNN model from {MODEL_PATH}")
    print(f"  - Validation accuracy: {checkpoint['val_acc']:.2f}%")
else:
    print(f"WARNING: Model file not found at {MODEL_PATH}")
    print("Please train the model first using Test_FE_PCA.ipynb")

cnn_model = cnn_model.to(device)
cnn_model.eval()

print("\nCNN model ready for inference!")

In [None]:
# ========================
# Load Trained Classifiers
# ========================
# Note: You'll need to train and save these classifiers from Test_FE_PCA.ipynb
# For now, we'll use the CNN model's predictions directly

# Placeholder for radiomics classifier
radiomics_classifier = None  # Load from pickle if saved

# Placeholder for fusion classifier  
fusion_classifier = None  # Load from pickle if saved

# Initialize radiomics extractor
radiomics_extractor = featureextractor.RadiomicsFeatureExtractor()

print("Classifiers initialized!")
print("NOTE: If radiomics/fusion classifiers are not trained yet,")
print("      only CNN predictions will be available.")

In [None]:
# ========================
# Image Preprocessing Functions
# ========================
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485], std=[0.229])
])

def load_and_preprocess_image(image_path):
    """Load and preprocess image for CNN inference"""
    pil_image = Image.open(image_path).convert('L')
    image_tensor = transform(pil_image).unsqueeze(0)
    return image_tensor, pil_image

def create_mask_for_image(pil_image):
    """Create binary mask for radiomics extraction"""
    # Convert PIL image to numpy array
    img_array = np.array(pil_image.resize((224, 224))).astype(np.float32)
    
    # Create mask with background border
    mask_array = np.ones(img_array.shape, dtype=np.uint8)
    mask_array[0, :] = 0
    mask_array[-1, :] = 0
    mask_array[:, 0] = 0
    mask_array[:, -1] = 0
    
    # Convert to SimpleITK
    image_sitk = sitk.GetImageFromArray(img_array)
    mask_sitk = sitk.GetImageFromArray(mask_array)
    mask_sitk = sitk.Cast(mask_sitk, sitk.sitkUInt8)
    mask_sitk.CopyInformation(image_sitk)
    
    return image_sitk, mask_sitk

print("Preprocessing functions defined!")

In [None]:
# ========================
# Feature Extraction Functions
# ========================
def extract_cnn_features(image_tensor):
    """Extract CNN features and prediction"""
    with torch.no_grad():
        image_tensor = image_tensor.to(device)
        logits, features = cnn_model(image_tensor)
        prediction = torch.argmax(logits, dim=1).item() + 1  # +1 to match labels 1-3
        features_np = features.cpu().numpy().flatten()
    return prediction, features_np

def extract_radiomics_features(pil_image):
    """Extract radiomics features and prediction"""
    try:
        # Create mask
        image_sitk, mask_sitk = create_mask_for_image(pil_image)
        
        # Extract features
        result = radiomics_extractor.execute(image_sitk, mask_sitk)
        
        # Filter to only original features
        features = {k: v for k, v in result.items() if k.startswith("original")}
        feature_vector = np.array([float(v) for v in features.values()])
        
        # Predict using radiomics classifier if available
        if radiomics_classifier is not None:
            prediction = radiomics_classifier.predict([feature_vector])[0]
        else:
            prediction = None  # No classifier available
        
        return prediction, feature_vector
    except Exception as e:
        print(f"Error extracting radiomics features: {e}")
        return None, None

def extract_fusion_features(cnn_features, radiomic_features):
    """Combine CNN and radiomics features for fusion prediction"""
    if cnn_features is None or radiomic_features is None:
        return None, None
    
    # Concatenate features
    fusion_features = np.concatenate([cnn_features, radiomic_features])
    
    # Predict using fusion classifier if available
    if fusion_classifier is not None:
        prediction = fusion_classifier.predict([fusion_features])[0]
    else:
        prediction = None  # No classifier available
    
    return prediction, fusion_features

print("Feature extraction functions defined!")

In [None]:
# ========================
# Main Inference Function
# ========================
def process_image(image_path):
    """
    Process a single image through the inference pipeline
    Returns: dict with predictions and features
    """
    filename = Path(image_path).name
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
    
    print(f"\nProcessing: {filename}")
    
    # Load and preprocess image
    image_tensor, pil_image = load_and_preprocess_image(image_path)
    
    # Extract CNN features and prediction
    print("  - Extracting CNN features...")
    cnn_pred, cnn_features = extract_cnn_features(image_tensor)
    print(f"    CNN prediction: {cnn_pred}")
    
    # Extract radiomics features and prediction
    print("  - Extracting radiomics features...")
    radiomic_pred, radiomic_features = extract_radiomics_features(pil_image)
    if radiomic_pred is not None:
        print(f"    Radiomics prediction: {radiomic_pred}")
    else:
        print("    Radiomics prediction: N/A (no classifier)")
    
    # Extract fusion features and prediction
    print("  - Extracting fusion features...")
    fusion_pred, fusion_features = extract_fusion_features(cnn_features, radiomic_features)
    if fusion_pred is not None:
        print(f"    Fusion prediction: {fusion_pred}")
    else:
        print("    Fusion prediction: N/A (no classifier)")
    
    # Prepare results
    results = {
        'filename': filename,
        'timestamp': timestamp,
        'predictions': {
            'cnn': cnn_pred,
            'radiomic': radiomic_pred,
            'fusion': fusion_pred
        },
        'features': {
            'cnn': cnn_features,
            'radiomic': radiomic_features,
            'fusion': fusion_features
        }
    }
    
    return results

print("Main inference function defined!")

In [None]:
# ========================
# Save Results to Excel
# ========================
def save_results_to_excel(results):
    """
    Append results to jobs.xlsx
    """
    # Read existing data
    predictions_df = pd.read_excel(JOBS_FILE, sheet_name='predictions')
    feature_scores_df = pd.read_excel(JOBS_FILE, sheet_name='feature_scores')
    
    # Add prediction row
    new_prediction = {
        'filename': results['filename'],
        'timestamp': results['timestamp'],
        'cnn_prediction': results['predictions']['cnn'],
        'radiomic_prediction': results['predictions']['radiomic'],
        'fusion_prediction': results['predictions']['fusion']
    }
    predictions_df = pd.concat([predictions_df, pd.DataFrame([new_prediction])], ignore_index=True)
    
    # Add feature scores row
    feature_row = {'filename': results['filename']}
    
    # Add CNN features
    if results['features']['cnn'] is not None:
        for i, val in enumerate(results['features']['cnn']):
            feature_row[f'cnn_feature_{i+1}'] = val
    
    # Add radiomic features
    if results['features']['radiomic'] is not None:
        for i, val in enumerate(results['features']['radiomic']):
            feature_row[f'radiomic_feature_{i+1}'] = val
    
    # Add fusion features
    if results['features']['fusion'] is not None:
        for i, val in enumerate(results['features']['fusion']):
            feature_row[f'fusion_feature_{i+1}'] = val
    
    feature_scores_df = pd.concat([feature_scores_df, pd.DataFrame([feature_row])], ignore_index=True)
    
    # Write back to Excel
    with pd.ExcelWriter(JOBS_FILE, engine='openpyxl', mode='w') as writer:
        predictions_df.to_excel(writer, sheet_name='predictions', index=False)
        feature_scores_df.to_excel(writer, sheet_name='feature_scores', index=False)
    
    print(f"  ✓ Results saved to {JOBS_FILE}")

print("Excel save function defined!")

In [None]:
# ========================
# Move Processed Image
# ========================
def move_to_done(image_path):
    """
    Move processed image to 'done' subdirectory
    """
    filename = Path(image_path).name
    destination = DONE_DIR / filename
    
    # Handle duplicate filenames
    if destination.exists():
        base = destination.stem
        ext = destination.suffix
        counter = 1
        while destination.exists():
            destination = DONE_DIR / f"{base}_{counter}{ext}"
            counter += 1
    
    shutil.move(str(image_path), str(destination))
    print(f"  ✓ Moved to {destination}")

print("Move function defined!")

In [None]:
# ========================
# Run Inference Pipeline
# ========================
def run_inference_pipeline():
    """
    Main pipeline: Process all images in inference directory
    """
    # Find all image files in inference directory
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.tif', '.tiff']
    image_files = []
    
    for ext in image_extensions:
        image_files.extend(list(INFERENCE_DIR.glob(f'*{ext}')))
        image_files.extend(list(INFERENCE_DIR.glob(f'*{ext.upper()}')))
    
    # Filter out files in subdirectories
    image_files = [f for f in image_files if f.parent == INFERENCE_DIR]
    
    if not image_files:
        print("No images found in inference directory.")
        print(f"Please place images in: {INFERENCE_DIR}")
        return
    
    print("="*70)
    print(f"Found {len(image_files)} image(s) to process")
    print("="*70)
    
    # Process each image
    for i, image_path in enumerate(image_files, 1):
        try:
            print(f"\n[{i}/{len(image_files)}] Processing {image_path.name}...")
            
            # Run inference
            results = process_image(image_path)
            
            # Save to Excel
            save_results_to_excel(results)
            
            # Move to done
            move_to_done(image_path)
            
            print(f"  ✓ Completed successfully!")
            
        except Exception as e:
            print(f"  ✗ ERROR processing {image_path.name}: {e}")
            import traceback
            traceback.print_exc()
            continue
    
    print("\n" + "="*70)
    print("Inference pipeline complete!")
    print(f"Results saved to: {JOBS_FILE}")
    print(f"Processed images moved to: {DONE_DIR}")
    print("="*70)

print("Pipeline function defined!")
print("\nReady to run inference!")
print("Execute the next cell to process all images in the inference directory.")

In [None]:
# ========================
# EXECUTE INFERENCE
# ========================
# Run this cell to process all images

run_inference_pipeline()

In [None]:
# ========================
# View Results
# ========================
# View the predictions sheet
print("Predictions:")
predictions_df = pd.read_excel(JOBS_FILE, sheet_name='predictions')
display(predictions_df)

print("\nFeature Scores (first 5 columns):")
feature_scores_df = pd.read_excel(JOBS_FILE, sheet_name='feature_scores')
display(feature_scores_df.iloc[:, :5])  # Show first 5 columns only