### Customer Segmentation 2 - Cluster by Banking Behaviour

In [None]:
# import necessary libraries
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# load data
customer_df = pd.read_csv('customer_table.csv')
accounts_df = pd.read_csv('accounts_table.csv')
transactions_df = pd.read_csv('transactions_table.csv')

# join tables
merged_df = customer_df.merge(accounts_df, on='customer_id').merge(transactions_df, on='account_id')

# select relevant features
features = ['transaction_amount', 'balance', 'credit_score']

# deal with missing values
merged_df[features] = merged_df[features].fillna(merged_df[features].mean())

# scale and normalize data
scaler = StandardScaler()
scaled_df = scaler.fit_transform(merged_df[features])

# apply K-means algorithm
kmeans = KMeans(n_clusters=3, random_state=0)
kmeans.fit(scaled_df)

# add cluster labels to the data
merged_df['cluster_label'] = kmeans.labels_

# analyze cluster characteristics
cluster_summary = merged_df.groupby('cluster_label')[features].mean()
print(cluster_summary)
