Her bir müşteri grubunun (kümelerin) hangi ülkelere ait olduğunu ve genel olarak nasıl bir Star Count ortalamasına sahip olduğunu anlamak.

In [4]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np

In [5]:
df = pd.read_csv('fashionnova_reviews.csv')

In [6]:
def extract_star_count(rating):
    try:
        # "Rated 5 out of 5 stars" formatındaki değerlerden sayıları çıkarır
        return int(rating.split(' ')[1])
    except Exception as e:
        # Hata durumunda NaN döndürür
        print(f"Error parsing rating: {rating} - {e}")
        return np.nan

In [7]:
df['Star Count'] = df['Rating'].apply(extract_star_count)

In [8]:
df.dropna(subset=['Star Count'], inplace=True)

In [9]:
df = df[['Country', 'Star Count']]
df.dropna(inplace=True)

In [10]:
df = pd.get_dummies(df, columns=['Country'])

In [11]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df)

In [12]:
kmeans = KMeans(n_clusters=5, random_state=42)
df['Cluster'] = kmeans.fit_predict(scaled_features)

In [13]:
print(df.groupby('Cluster').mean())

         Star Count  Country_AE  Country_AF  Country_AG  Country_AI  \
Cluster                                                               
0          3.777913     0.00000    0.000000    0.000000    0.000000   
1          3.732143     0.00000    0.000000    0.000000    0.000000   
2          4.288513     0.00056    0.000016    0.000327    0.000101   
3          3.946429     0.00000    0.000000    0.000000    0.000000   
4          2.947368     0.00000    0.000000    0.000000    0.000000   

         Country_AL  Country_AM  Country_AO  Country_AR  Country_AS  ...  \
Cluster                                                              ...   
0          0.000000    0.000000    0.000000    0.000000    0.000000  ...   
1          0.000000    0.000000    0.000000    0.000000    0.000000  ...   
2          0.000132    0.000016    0.000109    0.000047    0.000016  ...   
3          0.000000    0.000000    0.000000    0.000000    0.000000  ...   
4          0.000000    0.000000    0.000000   