In [7]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('cyrene_final_master_for_pbi.csv', sep=';', decimal=',')

b2c_label = 'B2C_Individual' if 'B2C_Individual' in df['Customer_Type'].unique() else 'Individual'
df_b2c = df[df['Customer_Type'] == b2c_label].copy()


cat_spend = df_b2c.pivot_table(index='Customer_Key', columns='Category', values='y', aggfunc='sum').fillna(0)
cat_spend['Total_Spend'] = cat_spend.sum(axis=1)
cat_spend = cat_spend[cat_spend['Total_Spend'] > 0].copy()


def safe_ratio(df, col):
    return df[col] / df['Total_Spend'] if col in df.columns else 0

cat_spend['Bike_Ratio'] = safe_ratio(cat_spend, 'Bikes')
cat_spend['Acc_Ratio'] = safe_ratio(cat_spend, 'Accessories')
cat_spend['Comp_Ratio'] = safe_ratio(cat_spend, 'Components')


X = cat_spend[['Bike_Ratio', 'Acc_Ratio', 'Comp_Ratio']]

#  K-MEANS VE NORMALÄ°ZASYON
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
cat_spend['Kabile_ID'] = kmeans.fit_predict(X_scaled)


print("\n=== Kabile Karakterleri (Ortalamalar) ===")
print(cat_spend.groupby('Kabile_ID')[['Bike_Ratio', 'Acc_Ratio', 'Comp_Ratio']].mean())


cat_spend.reset_index()[['Customer_Key', 'Kabile_ID']].to_csv('b2c_kabile_analizi.csv', index=False, sep=';')
print("\nðŸš€ Kabile analizi tamamlandÄ±!")


=== Kabile Karakterleri (Ortalamalar) ===
Category   Bike_Ratio  Acc_Ratio  Comp_Ratio
Kabile_ID                                   
0            0.000000   0.961630         0.0
1            0.977577   0.014560         0.0
2            0.000000   0.252433         0.0

ðŸš€ Kabile analizi tamamlandÄ±!


In [12]:
df.index

RangeIndex(start=0, stop=121253, step=1)

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler


df = pd.read_csv('cyrene_final_master_for_pbi.csv', sep=';', decimal=',')


b2c_label = 'B2C_Individual' if 'B2C_Individual' in df['Customer_Type'].unique() else 'Individual'
df_b2c = df[df['Customer_Type'] == b2c_label].copy()


cat_spend = df_b2c.pivot_table(index='Customer_Key', columns='Category', values='y', aggfunc='sum').fillna(0)
cat_spend['Total_Spend'] = cat_spend.sum(axis=1)
cat_spend = cat_spend[cat_spend['Total_Spend'] > 0].copy()

# DÄ°NAMÄ°K SÃœTUN KONTROLÃœ

existing_categories = [c for c in ['Bikes', 'Accessories', 'Components'] if c in cat_spend.columns]


features_to_use = []
for col in existing_categories:
    ratio_name = f'{col}_Ratio'
    cat_spend[ratio_name] = cat_spend[col] / cat_spend['Total_Spend']
    features_to_use.append(ratio_name)

# Sepet Ã‡eÅŸitliliÄŸi (KaÃ§ kategori var?)
cat_spend['Cat_Count'] = (cat_spend[existing_categories] > 0).sum(axis=1)
features_to_use.append('Cat_Count')

#  K-MEANS HAZIRLIÄžI

X = cat_spend[features_to_use]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#  KÃœMELEME 
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
cat_spend['Kabile_ID'] = kmeans.fit_predict(X_scaled)


print("\n=== Bulunan Metriklerle Kabile Karakterleri ===")
print(cat_spend.groupby('Kabile_ID')[features_to_use].mean())

# Ã‡IKTI
cat_spend.reset_index()[['Customer_Key', 'Kabile_ID']].to_csv('b2c_kabile_analizi_final.csv', index=False, sep=';')



=== Bulunan Metriklerle Kabile Karakterleri ===
Category   Bikes_Ratio  Accessories_Ratio  Cat_Count
Kabile_ID                                           
0             0.000000           0.945431   1.000000
1             0.973974           0.020371   2.000000
2             0.986604           0.000000   1.000000
3             0.000000           0.213048   0.668257


In [16]:
# TÃ¼m B2C cirosu iÃ§inde kategorilerin payÄ±
b2c_stats = df_b2c.groupby('Category')['y'].sum() / df_b2c['y'].sum() * 100
print(b2c_stats)

Category
Accessories     2.386892
Bikes          96.455792
Clothing        1.157316
Name: y, dtype: float64


In [18]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler


df = pd.read_csv('cyrene_final_master_for_pbi.csv', sep=';', decimal=',')


b2c_label = 'B2C_Individual' if 'B2C_Individual' in df['Customer_Type'].unique() else 'Individual'
df_b2c = df[df['Customer_Type'] == b2c_label].copy()


cat_spend = df_b2c.pivot_table(index='Customer_Key', columns='Category', values='y', aggfunc='sum').fillna(0)
cat_spend['Total_Spend'] = cat_spend.sum(axis=1)
cat_spend = cat_spend[cat_spend['Total_Spend'] > 0].copy()

def safe_ratio(df, col):
    return df[col] / df['Total_Spend'] if col in df.columns else 0

cat_spend['Bike_Ratio'] = safe_ratio(cat_spend, 'Bikes')
cat_spend['Acc_Ratio'] = safe_ratio(cat_spend, 'Accessories')
cat_spend['Cloth_Ratio'] = safe_ratio(cat_spend, 'Clothing')


features = ['Bike_Ratio', 'Acc_Ratio', 'Cloth_Ratio']
X = cat_spend[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
cat_spend['Kabile_ID'] = kmeans.fit_predict(X_scaled)


kabile_isimleri = {
    0: "Saf PerformansÃ§Ä±lar",    
    1: "DonanÄ±mcÄ±lar",           
    2: "Kombinciler (MaceracÄ±)", 
    3: "Stil Ä°konlarÄ±"           
}

cat_spend['Kabile_Ismi'] = cat_spend['Kabile_ID'].map(kabile_isimleri)

# 6. ANALÄ°Z VE Ã‡IKTI
print("\n=== Ä°simlendirilmiÅŸ Kabile Karakterleri ===")
print(cat_spend.groupby('Kabile_Ismi')[features].mean())

# Kaydederken hem ID'yi hem Ä°smi alÄ±yoruz ki PBI'da rahat edelim
cat_spend.reset_index()[['Customer_Key', 'Kabile_ID', 'Kabile_Ismi']].to_csv('b2c_kabile_analizi_final_v3.csv', index=False, sep=';')

print("\n Ä°simlendirilmiÅŸ analiz tamamlandÄ±! Dosya: 'b2c_kabile_analizi_final_v3.csv'")


=== Ä°simlendirilmiÅŸ Kabile Karakterleri ===
Category                Bike_Ratio  Acc_Ratio  Cloth_Ratio
Kabile_Ismi                                               
DonanÄ±mcÄ±lar              0.000000   0.982801     0.017199
Kombinciler (MaceracÄ±)    0.000000   0.490318     0.509682
Saf PerformansÃ§Ä±lar       0.977577   0.014560     0.007863
Stil Ä°konlarÄ±             0.000000   0.059515     0.940485

ðŸš€ Ä°simlendirilmiÅŸ analiz tamamlandÄ±! Dosya: 'b2c_kabile_analizi_final_v3.csv'
