In [5]:
import os
import sys

import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchvision import transforms

sys.path.append('/app/scripts/')

from HAM10000Dataset import HAM10000Dataset
from SkinLesionClassifier import SkinLesionClassifier

# Main directory
base_dir = '/app/'

# Data directory
data_dir = os.path.join(base_dir, "data")

# Directory for images
images_dir = os.path.join(data_dir, "HAM10000_images")

# Path to the metadata CSV file
metadata_file = os.path.join(data_dir, "HAM10000_metadata")

# Models directory
models_dir = os.path.join(base_dir, "models")

# Choose the model architecture
model_name = 'efficientnet_b0'

if model_name == 'resnet50':
    model_dir = os.path.join(models_dir, "resnet")
elif model_name == 'densenet121':
    model_dir = os.path.join(models_dir, "densenet")
elif model_name == 'efficientnet_b0':
    model_dir = os.path.join(models_dir, "efficientnet")

# Define transformations for the test dataset
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match the input size of the models
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Initialize the dataset with the image directory
dataset = HAM10000Dataset(
    annotations=pd.read_csv(metadata_file), # Path to the metadata CSV file
    root_dir=images_dir,             # Directory containing images
    transform=val_transform,         # Use the validation transform by default
    augmentations=None,              # Set augmentations to None for testing phase
    balance_classes=False            # No need to balance classes during testing
)

# Create a DataLoader for the dataset
dataset_loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=2)

# Initialize the SkinLesionClassifier
classifier = SkinLesionClassifier(model_name=model_name, num_classes=7, weights=True)

# Load the best model weights
best_model_path = os.path.join(model_dir, "best_model_ham10000_fold_3.pth")  # Adjust this to the actual path of the model weights
classifier.load_model_weights(best_model_path)

# Move the model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
classifier.model.to(device)

# Extract features using the classifier
features = classifier.extract_features(dataset_loader, device)

# Convert the features to a DataFrame
features_df = pd.DataFrame(features.numpy())

# Add image filenames to the DataFrame (optional, if you want to associate features with image filenames)
image_filenames = [os.path.basename(dataset.annotations.iloc[i]['image_id']) for i in range(len(dataset))]
features_df.insert(0, 'image_id', image_filenames)

# Save the DataFrame to a CSV file
features_csv_path = os.path.join(models_dir, "extracted_features_train_val.csv")
features_df.to_csv(features_csv_path, index=False)

print(f"Features DataFrame saved to {features_csv_path}")


Features DataFrame saved to /app/models/extracted_features_train_val.csv
