In [40]:
# --- DBSCAN Clustering ---
# Step 5: Apply DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5, metric='cosine')
dbscan_labels = dbscan.fit_predict(X)  # Fit the model and predict cluster labels

# Step 6: Assign cluster labels to the products
products['cluster_dbscan'] = dbscan_labels  # -1 indicates outliers/noise

# Step 7: Evaluate clustering with silhouette score
silhouette_avg_dbscan = silhouette_score(X, dbscan_labels)  # Silhouette score may not be as useful with DBSCAN due to noise
print(f"Silhouette Score (DBSCAN): {silhouette_avg_dbscan}")
# Step 8: Find the most similar cluster using DBSCAN (ignoring noise/outliers with -1)
search_similarities_dbscan = cosine_similarity(search_vector, X)  # Compare the search term to all product vectors
most_similar_product_dbscan = np.argmax(search_similarities_dbscan)  # Get the index of the most similar product

# Step 9: Get recommended products
recommended_products_dbscan = products.iloc[[most_similar_product_dbscan]].head(5)  # Recommend the most similar product (top 1)

# Step 10: Display the recommended products (DBSCAN)
print("Recommended Products based on your search (DBSCAN):")
print(recommended_products_dbscan[['title', 'category']])


Silhouette Score (DBSCAN): -0.00032322570185650175
Recommended Products based on your search (DBSCAN):
                                                 title category
147  Cuccio Naturale Revitalizing- Hydrating Oil Fo...    nails


In [42]:
# --- Gaussian Mixture Models (GMM) ---
# Step 5: Apply Gaussian Mixture Model clustering
n_components = 10  # Choose the number of clusters
gmm = GaussianMixture(n_components=n_components, random_state=42)
gmm_labels = gmm.fit_predict(X.toarray())  # GMM needs the data in dense format

# Step 6: Assign cluster labels to the products
products['cluster_gmm'] = gmm_labels

# Step 7: Evaluate clustering with silhouette score
silhouette_avg_gmm = silhouette_score(X, gmm_labels)  # Silhouette score to evaluate GMM clustering
print(f"Silhouette Score (GMM): {silhouette_avg_gmm}")

# Step 8: Find the most similar cluster using GMM
search_similarities_gmm = cosine_similarity(search_vector, X)  # Compare the search term to all product vectors
most_similar_product_gmm = np.argmax(search_similarities_gmm)  # Get the index of the most similar product

# Step 9: Get recommended products from GMM clustering
recommended_products_gmm = products.iloc[[most_similar_product_gmm]].head(5)  # Recommend top 1 product

# Step 10: Display the recommended products (GMM)
print("Recommended Products based on your search (GMM):")
print(recommended_products_gmm[['title', 'category', 'description']])

Silhouette Score (GMM): 0.07723909777047208
Recommended Products based on your search (GMM):
                                                 title category  \
147  Cuccio Naturale Revitalizing- Hydrating Oil Fo...    nails   

                                           description  
147  cuccio naturale revitalizing hydrating oil rep...  


In [43]:
print(f"Silhouette Score (KMeans): {silhouette_avg}")
print(f"Silhouette Score (DBSCAN): {silhouette_avg_dbscan}")
print(f"Silhouette Score (GMM): {silhouette_avg_gmm}")

# You can also display the cluster count for each algorithm
print(f"Number of clusters (KMeans): {n_clusters}")
print(f"Number of clusters (DBSCAN): {len(set(dbscan_labels)) - (1 if -1 in dbscan_labels else 0)}")
print(f"Number of clusters (GMM): {n_components}")

Silhouette Score (KMeans): 0.07723054160800924
Silhouette Score (DBSCAN): -0.00032322570185650175
Silhouette Score (GMM): 0.07723909777047208
Number of clusters (KMeans): 10
Number of clusters (DBSCAN): 9
Number of clusters (GMM): 10
