In [20]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np
import os
from sklearn.cluster import KMeans
import joblib  # For saving the K-Means model
import json    # For saving the cluster label mapping

# Define data directory and image size
data_dir = 'C:\\Users\\DELL\\OneDrive\\Desktop\\medical_snapchat\\models\\dullness_data'  # Your folder with all mixed images
img_height = 128
img_width = 128
num_clusters = 2  # We want to find 2 clusters
feature_extraction_model_path = 'vgg16_feature_extractor.h5'
kmeans_model_path = 'skin_condition_kmeans.joblib'
cluster_mapping_path = 'cluster_label_mapping.json'

# 1. Feature Extraction Model (Save it)
base_model = VGG16(weights='imagenet', include_top=False, pooling='avg', input_shape=(img_height, img_width, 3))
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)
feature_extractor.save(feature_extraction_model_path)
print(f"Feature extraction model saved to: {feature_extraction_model_path}")

def load_feature_extractor(model_path):
    return tf.keras.models.load_model(model_path)

def extract_features(model, img_path):
    img = image.load_img(img_path, target_size=(img_height, img_width))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = tf.keras.applications.vgg16.preprocess_input(img_array)
    features = model.predict(img_array)
    return features.flatten()

image_paths = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f))]
features_list = []
feature_extraction_model = load_feature_extractor(feature_extraction_model_path)
for img_path in image_paths:
    try:
        features = extract_features(feature_extraction_model, img_path)
        features_list.append(features)
    except Exception as e:
        print(f"Error extracting features from {img_path}: {e}")

features_array = np.array(features_list)
print(f"Extracted features shape: {features_array.shape}")

# 2. Apply and Save Clustering Model
kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init=10)
kmeans.fit(features_array)
joblib.dump(kmeans, kmeans_model_path)
print(f"K-Means model saved to: {kmeans_model_path}")

cluster_labels = kmeans.predict(features_array)

# 3. Label the Clusters (MANUAL STEP REQUIRED - UPDATE THE MAPPING BELOW)
# You MUST inspect images from each cluster (e.g., by looking at the 'image_cluster_map' below)
# and determine which cluster corresponds to 'healthy_radiant' and which to 'not_healthy_radiant'.
# Update the dictionary accordingly.
cluster_label_mapping = {0: 'healthy_radiant', 1: 'not_healthy_radiant'}

# Save the cluster label mapping
with open(cluster_mapping_path, 'w') as f:
    json.dump(cluster_label_mapping, f, indent=4)
print(f"Cluster label mapping saved to: {cluster_mapping_path}")

# Create a mapping of image path to cluster label (useful for inspection)
image_cluster_map = dict(zip(image_paths, cluster_labels))
print("\nImage to Cluster Mapping (Inspect images in each cluster to determine labels):")
for path, label in image_cluster_map.items():
    print(f"{path}: Cluster {label}")

print("\nRemember to manually inspect images from each cluster and update the 'cluster_label_mapping' dictionary in this script before using the prediction script.")



Feature extraction model saved to: vgg16_feature_extractor.h5




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [21]:
cluster_labels = kmeans.predict(features_array)

In [25]:
cluster_label_mapping = {0: 'not_healthy_radiant', 1: 'healthy_radiant'}

with open(cluster_mapping_path, 'w') as f:
    json.dump(cluster_label_mapping, f, indent=4)
print(f"Cluster label mapping saved to: {cluster_mapping_path}")

# Create a mapping of image path to cluster label (useful for inspection)
image_cluster_map = dict(zip(image_paths, cluster_labels))
print("\nImage to Cluster Mapping (Inspect images in each cluster to determine labels):")
for path, label in image_cluster_map.items():
    print(f"{path}: Cluster {label}")

print("\nRemember to manually inspect images from each cluster and update the 'cluster_label_mapping' dictionary in this script before using the prediction script.")

Cluster label mapping saved to: cluster_label_mapping.json

Image to Cluster Mapping (Inspect images in each cluster to determine labels):
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000001.png: Cluster 0
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000002.png: Cluster 0
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000003.png: Cluster 0
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000004.png: Cluster 0
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000005.png: Cluster 0
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000006.png: Cluster 0
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000007.png: Cluster 1
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000008.png: Cluster 0
C:\Users\DELL\OneDrive\Desktop\medical_snapchat\models\dullness_data\000009.png: Cluster 1
C:\Users\DELL\OneDrive\Desktop\medical_sna

In [26]:
# 2. Apply and Save Clustering Model
kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init=10)
kmeans.fit(features_array)
joblib.dump(kmeans, kmeans_model_path)
print(f"K-Means model saved to: {kmeans_model_path}")

K-Means model saved to: skin_condition_kmeans.joblib


In [27]:
# 1. Feature Extraction Model (Save it)
base_model = VGG16(weights='imagenet', include_top=False, pooling='avg', input_shape=(img_height, img_width, 3))
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)
feature_extractor.save(feature_extraction_model_path)
print(f"Feature extraction model saved to: {feature_extraction_model_path}")



Feature extraction model saved to: vgg16_feature_extractor.h5
