In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score

In [2]:
df = pd.read_excel(r'../Datasets/test_dataset_new.xlsx')
df.head(2)

Unnamed: 0,product_link,prc-dsc,rating-line-count,total-review-count,favorite-count,campaign-name,dd-txt-vl,sl-pn,seller-name-text,Renk,Garanti Süresi,Aktif Gürültü Önleme (ANC),Mikrofon,Bluetooth Versiyon,Suya/Tere Dayanıklılık,Dokunmatik Kontrol,Çift Telefon Desteği,Garanti Tipi,Model
0,https://www.trendyol.com/maybax/kablosuz-bluet...,"289,90 TL",Not found,Not found,Not found,150 TL ve Üzeri Kargo Bedava (Satıcı Karşılar),Not found,8.5,KZL Teknoloji,Pembe,2 Yıl,Var,Var,5.0,Var,Yok,,,
1,https://www.trendyol.com/sony/wh-ch710n-blueto...,8.587 TL,Not found,Not found,339,150 TL ve Üzeri Kargo Bedava (Satıcı Karşılar),2 gün içinde,8.6,e-babil elektronik,Mavi,2 Yıl,Var,Var,,,,,,


In [3]:
# Replace 'Not found' with np.nan and fill numerical missing values with the mean
df.replace('Not found', np.nan, inplace=True)
df['rating-line-count'] = df['rating-line-count'].astype(float)
df['total-review-count'] = pd.to_numeric(df['total-review-count'], errors='coerce')
df['favorite-count'] = pd.to_numeric(df['favorite-count'], errors='coerce')
df.fillna(df.mean(numeric_only=True), inplace=True)

In [19]:
# Encoding categorical features
categorical_features = ['Renk', 'Aktif Gürültü Önleme (ANC)', 'Mikrofon', 'Suya/Tere Dayanıklılık', 'Dokunmatik Kontrol', 'Çift Telefon Desteği']
for feature in categorical_features:
    df[feature] = LabelEncoder().fit_transform(df[feature].astype(str))

In [20]:
# Selecting relevant features for clustering
features_for_clustering = ['rating-line-count', 'total-review-count', 'favorite-count'] + categorical_features
X = df[features_for_clustering]

In [21]:
# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [22]:
# Applying K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(X_scaled)

  super()._check_params_vs_input(X, default_n_init=10)


In [23]:
# Applying DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(X_scaled)

In [24]:
# Evaluating the performance of the clustering algorithms using silhouette score
kmeans = silhouette_score(X_scaled, kmeans_labels)
dbscan = silhouette_score(X_scaled, dbscan_labels, metric='euclidean')

In [25]:

print(f" K-Means: {kmeans}")
print(f" DBSCAN: {dbscan}")

 K-Means: 0.36197247326253645
 DBSCAN: 0.2351155395593626
