In [2]:
import numpy as np
import cv2
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
import itertools


In [3]:
vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

# Define the paths to patches
png_paths = [
    r"C:\Users\lakho\Desktop\URECA\Whole Slide Images .svs\WSI 1 Tiles\132_100.png",
    r"C:\Users\lakho\Desktop\URECA\Whole Slide Images .svs\WSI 1 Tiles\132_105.png",
    r"C:\Users\lakho\Desktop\URECA\Whole Slide Images .svs\WSI 1 Tiles\132_117.png",
    r"C:\Users\lakho\Desktop\URECA\Whole Slide Images .svs\WSI 1 Tiles\132_126.png",
    r"C:\Users\lakho\Desktop\URECA\Whole Slide Images .svs\WSI 1 Tiles\132_127.png"
]


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [4]:
def extract_features(image_path):
    image = load_img(image_path, target_size=(256, 256))
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)
    features = vgg_model.predict(image)
    return features.flatten()


In [8]:
features = [extract_features(path) for path in png_paths]
features = np.array(features)

# Dimensionality Reduction
pca = PCA(n_components=5)
features_pca = pca.fit_transform(features)

# Adjusted parameter ranges
eps_values = np.arange(0.1, 15, 0.5)
min_samples_values = range(2, 20, 2)
best_params = {}
best_score = -1


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 489ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 493ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 462ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 444ms/step


In [9]:
# Grid search over the parameter space
for eps, min_samples in itertools.product(eps_values, min_samples_values):
    # Apply DBSCAN
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(features_pca)
    labels = db.labels_
    
    # Only calculate silhouette score if there is more than one cluster
    if len(set(labels)) > 1 and -1 not in set(labels):  # Avoid if all points are noise or only one cluster
        score = silhouette_score(features_pca, labels)
        
        # Update best parameters if score is better
        if score > best_score:
            best_score = score
            best_params = {"eps": eps, "min_samples": min_samples}

print("Best Parameters:", best_params)
print("Best Silhouette Score:", best_score)

Best Parameters: {}
Best Silhouette Score: -1
