In [5]:
import os
import numpy as np
import pickle
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.metrics.pairwise import cosine_similarity
import pydicom
from PIL import Image

# Paths
dataset_dir = 'C:/Users/Badari/OneDrive/Desktop/SDP/lung/dataset/images/images'
test_image_path = 'C:/Users/Badari/OneDrive/Desktop/SDP/lung/dataset/images/images/LIDC-IDRI-0001-000001.dcm'
model_path = 'saved_dicom_model.pkl'

# Load Pre-trained Model (VGG16)
vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

def process_dicom_image(image_path):
    """Process a DICOM file and return normalized image array"""
    dicom_image = pydicom.dcmread(image_path)
    pixel_array = dicom_image.pixel_array
    
    # Convert to 3 channels if grayscale
    if len(pixel_array.shape) == 2:
        pixel_array = np.stack([pixel_array] * 3, axis=-1)
    
    # Normalize to 0-255
    pixel_array = ((pixel_array - np.min(pixel_array)) / 
                  (np.max(pixel_array) - np.min(pixel_array)) * 255.0)
    return pixel_array.astype(np.uint8)

def extract_features(image_path, model):
    try:
        # Load and process DICOM image
        img_array = process_dicom_image(image_path)
        
        # Resize to target size
        img = Image.fromarray(img_array)
        img = img.resize((224, 224))
        img_array = img_to_array(img)
        
        # Normalize and prepare for model
        img_array = img_array / 255.0
        img_array = np.expand_dims(img_array, axis=0)
        
        # Extract features with verbose=0 to reduce output
        features = model.predict(img_array, verbose=0)
        return features.flatten()
    except Exception as e:
        print(f"Error extracting features from {image_path}: {str(e)}")
        return None

def train_and_save_model(dataset_dir, model, model_path):
    dataset_features = []
    processed_files = 0
    
    # Directly process .dcm files in the directory
    for filename in os.listdir(dataset_dir):
        if filename.endswith('.dcm'):
            image_path = os.path.join(dataset_dir, filename)
            features = extract_features(image_path, model)
            if features is not None:
                dataset_features.append(features)
                processed_files += 1
                if processed_files % 10 == 0:  # Progress update
                    print(f"Processed {processed_files} files...")

    if not dataset_features:
        raise ValueError("No features were extracted from the dataset")

    dataset_features = np.array(dataset_features)
    with open(model_path, 'wb') as file:
        pickle.dump(dataset_features, file)
    print(f"Model saved to {model_path}")
    print(f"Total images processed: {len(dataset_features)}")
    return dataset_features

def test_image(test_image_path, dataset_features, model, similarity_threshold=0.8):
    if len(dataset_features) == 0:
        return "No features in dataset to compare against"
    
    test_image_features = extract_features(test_image_path, model)
    if test_image_features is None:
        return "Failed to extract features from test image"
    
    similarities = cosine_similarity([test_image_features], dataset_features)
    max_similarity = np.max(similarities)

    if max_similarity > similarity_threshold:
        return f"Related to the dataset (Similarity: {max_similarity:.2f})"
    else:
        return f"Not related to the dataset (Similarity: {max_similarity:.2f})"

def main():
    try:
        # Train and Save
        print("Starting training...")
        dataset_features = train_and_save_model(dataset_dir, vgg_model, model_path)

        if len(dataset_features) > 0:
            # Test
            print("\nTesting image...")
            result = test_image(test_image_path, dataset_features, vgg_model)
            print(result)
        else:
            print("No features were extracted from the dataset")
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()

Starting training...
Processed 10 files...
Processed 20 files...
Processed 30 files...
Processed 40 files...
Processed 50 files...
Processed 60 files...
Processed 70 files...
Processed 80 files...
Processed 90 files...
Processed 100 files...
Processed 110 files...
Processed 120 files...
Processed 130 files...
Processed 140 files...
Processed 150 files...
Processed 160 files...
Processed 170 files...
Processed 180 files...
Processed 190 files...
Processed 200 files...
Processed 210 files...
Processed 220 files...
Processed 230 files...
Processed 240 files...
Processed 250 files...
Processed 260 files...
Processed 270 files...
Processed 280 files...
Processed 290 files...
Processed 300 files...
Processed 310 files...
Processed 320 files...
Processed 330 files...
Processed 340 files...
Processed 350 files...
Processed 360 files...
Processed 370 files...
Processed 380 files...
Processed 390 files...
Processed 400 files...
Processed 410 files...
Processed 420 files...
Processed 430 files...