In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/bank-customer-churn/botswana_bank_customer_churn.csv


In [2]:
df=pd.read_csv("/kaggle/input/bank-customer-churn/botswana_bank_customer_churn.csv")

In [3]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'First Name', 'Date of Birth',
       'Gender', 'Marital Status', 'Number of Dependents', 'Occupation',
       'Income', 'Education Level', 'Address', 'Contact Information',
       'Customer Tenure', 'Customer Segment',
       'Preferred Communication Channel', 'Credit Score',
       'Credit History Length', 'Outstanding Loans', 'Churn Flag',
       'Churn Reason', 'Churn Date', 'Balance', 'NumOfProducts',
       'NumComplaints'],
      dtype='object')

In [4]:
##Clustring için gerekli olmayan sütunları silelim.
df=df.drop(columns=["RowNumber","CustomerId","Surname","First Name","Address","Churn Flag","Churn Reason","Churn Date"])
print(df.columns)

Index(['Date of Birth', 'Gender', 'Marital Status', 'Number of Dependents',
       'Occupation', 'Income', 'Education Level', 'Contact Information',
       'Customer Tenure', 'Customer Segment',
       'Preferred Communication Channel', 'Credit Score',
       'Credit History Length', 'Outstanding Loans', 'Balance',
       'NumOfProducts', 'NumComplaints'],
      dtype='object')


In [5]:
##Age değişkenini oluşturalım.
from datetime import datetime
from dateutil.relativedelta import relativedelta
df['Date of Birth'] = pd.to_datetime(df['Date of Birth'])

df['Age'] = df['Date of Birth'].apply(lambda x: relativedelta(datetime.now(), x).years)

print(df["Age"])

0         37
1         24
2         70
3         33
4         32
          ..
115635    59
115636    69
115637    69
115638    31
115639    49
Name: Age, Length: 115640, dtype: int64


In [6]:
df = df.drop(columns=['Date of Birth'])
print(df.shape[1])

17


In [7]:

categoric_cols= ["Gender","Marital Status","Education Level","Customer Segment","Preferred Communication Channel","Contact Information"]
sayısal_cols= [col for col in df.columns if col not in categoric_cols]
print(sayısal_cols)

['Number of Dependents', 'Occupation', 'Income', 'Customer Tenure', 'Credit Score', 'Credit History Length', 'Outstanding Loans', 'Balance', 'NumOfProducts', 'NumComplaints', 'Age']


In [8]:
##Scaling
from sklearn.preprocessing import StandardScaler
# StandardScaler oluşturma
scaler = StandardScaler()

for col in sayısal_cols:
    df[col] = scaler.fit_transform(df[[col]])

print(df[sayısal_cols].head())

ValueError: could not convert string to float: 'Information systems manager'

In [None]:
## Encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder

# Label Encoding
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])
df['Marital Status'] = le.fit_transform(df['Marital Status'])

# One-Hot Encoding
df_encoded = pd.get_dummies(df[['Customer Segment', 'Preferred Communication Channel']])
df = pd.concat([df.drop(['Customer Segment', 'Preferred Communication Channel'], axis=1), df_encoded], axis=1)
#Ordinal Encoding
ordinal_encoder = OrdinalEncoder(categories=[['High School', "Master's", "Bachelor's", 'Diploma']])
df['Education Level'] = ordinal_encoder.fit_transform(df[['Education Level']])

In [None]:
df.head()

**K-Means**

In [None]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
X = df.iloc[:].values
# Elbow Yöntemi
wcss = []
kume_sayisi_listesi = range(1, 15)
for i in kume_sayisi_listesi :
    kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)
    
plt.plot(kume_sayisi_listesi, wcss)
plt.title('Küme Sayısı Belirlemek için Dirsek Yöntemi')
plt.xlabel('Küme Sayısı')
plt.ylabel('WCSS')
plt.show()

In [None]:
kmeans = KMeans(n_clusters = 3, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
y_kmeans = kmeans.fit_predict(X)

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# PCA sonuçlarını görselleştirme
plt.figure(figsize=(10, 6))

# Küme 1
plt.scatter(X_pca[y_kmeans == 0, 0], X_pca[y_kmeans == 0, 1], s=100, c='red', label='Küme 1')

# Küme 2
plt.scatter(X_pca[y_kmeans == 1, 0], X_pca[y_kmeans == 1, 1], s=100, c='blue', label='Küme 2')

# Küme 3
plt.scatter(X_pca[y_kmeans == 2, 0], X_pca[y_kmeans == 2, 1], s=100, c='green', label='Küme 3')

# Küme merkezlerini gösterme
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='yellow', marker='X', label='Küme Merkezleri')

plt.title('K-Means Clustering Results (PCA)')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import silhouette_score
silhouette_avg = silhouette_score(X, y_kmeans)
print(f"Silhouette Score: {silhouette_avg:.2f}")

# Küme merkezlerini göster
print("Cluster Centers:")
print(kmeans.cluster_centers_)