In [1]:
import numpy as np
import pandas as pd
from fcmeans import FCM
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('rfm_inverse_transform_new.csv')
X_scaled_df = pd.read_csv('X_scaled_df_new.csv')

In [3]:
df

Unnamed: 0,Recency,Frequency,Monetary,Clusters
0,125,1,99.33,3
1,64,1,24.39,3
2,260,1,65.71,3
3,266,1,107.78,3
4,28,3,627.74,2
...,...,...,...,...
114970,175,1,194.11,3
114971,105,1,198.94,2
114972,173,1,363.31,1
114973,21,1,369.54,2


In [4]:
X_scaled_df

Unnamed: 0,Recency,Frequency,Monetary
0,-0.333986,-0.571299,-0.396089
1,-1.030698,-0.571299,-1.684829
2,0.428228,-0.571299,-0.775291
3,0.451973,-0.571299,-0.321162
4,-1.891066,1.378636,1.295904
...,...,...,...
114970,0.016199,-0.571299,0.218767
114971,-0.515444,-0.571299,0.241323
114972,0.004236,-0.571299,0.794027
114973,-2.190472,-0.571299,0.809631


In [5]:
X = X_scaled_df.to_numpy()
X

array([[-0.33398565, -0.57129895, -0.39608904],
       [-1.03069765, -0.57129895, -1.68482942],
       [ 0.42822847, -0.57129895, -0.77529059],
       ...,
       [ 0.00423604, -0.57129895,  0.79402727],
       [-2.19047189, -0.57129895,  0.80963095],
       [ 0.45587818, -0.57129895,  0.20672705]])

In [6]:
fcm = FCM(n_clusters=5,m=2,error=0.005,max_iter=100)
fcm.fit(X_scaled_df.to_numpy())

In [7]:
# outputs
fcm_centers = fcm.centers
fcm_labels = fcm.predict(X_scaled_df.to_numpy())

In [8]:
fcm_centers

array([[ 0.33169001, -0.49866288,  0.05890159],
       [ 0.45279485, -0.51989857, -0.87093317],
       [ 0.26751904,  0.71583822,  0.60589041],
       [-1.72270969, -0.42009052, -0.31241755],
       [ 0.08149098,  2.0484052 ,  1.81550531]])

In [12]:
centroids_df_fcm = pd.DataFrame(fcm_centers,columns = ['X','Y','Z'])
centroids_df_fcm

Unnamed: 0,X,Y,Z
0,0.33169,-0.498663,0.058902
1,0.452795,-0.519899,-0.870933
2,0.267519,0.715838,0.60589
3,-1.72271,-0.420091,-0.312418
4,0.081491,2.048405,1.815505


In [9]:
np.unique(fcm_labels)

array([0, 1, 2, 3, 4], dtype=int64)

In [10]:
df['FCM_clusters'] = pd.DataFrame(fcm_labels, columns = ['FCM_Clusters'])
df

Unnamed: 0,Recency,Frequency,Monetary,Clusters,FCM_clusters
0,125,1,99.33,3,0
1,64,1,24.39,3,3
2,260,1,65.71,3,1
3,266,1,107.78,3,0
4,28,3,627.74,2,4
...,...,...,...,...,...
114970,175,1,194.11,3,0
114971,105,1,198.94,2,0
114972,173,1,363.31,1,0
114973,21,1,369.54,2,3


In [14]:
model = KMeans(n_clusters=5, init='random', random_state=101)

# scaling the data to normalize
model = model.fit(X_scaled_df)

# fitted labels for clusters -- the first cluster has label 0, and the second has label 1.
y_kmeans = model.labels_

In [15]:
y_ga = df['Clusters']

In [14]:
y_fc = df['FCM_clusters']

In [12]:
from sklearn.metrics import silhouette_score

In [17]:
from sklearn.metrics import silhouette_score

sil_kmeans = silhouette_score(X_scaled_df,y_kmeans)

In [18]:
sil_ga = silhouette_score(X_scaled_df,y_ga)

In [15]:
sil_fc = silhouette_score(X_scaled_df,y_fc)

In [17]:
round(sil_fc,3)

0.303

In [20]:
print(round(sil_kmeans,3))
print(round(sil_ga,3))
print(round(sil_fc,3))

0.312
0.335
0.303


In [21]:
df

Unnamed: 0,Recency,Frequency,Monetary,Clusters,FCM_clusters
0,125,1,99.33,3,1
1,64,1,24.39,3,4
2,260,1,65.71,3,3
3,266,1,107.78,3,1
4,28,3,627.74,2,2
...,...,...,...,...,...
114970,175,1,194.11,3,1
114971,105,1,198.94,2,1
114972,173,1,363.31,1,1
114973,21,1,369.54,2,4


In [11]:
df.to_csv('df_with_FCM.csv',index=False)

In [14]:
centroids_df_fcm.to_csv('centroids_df_fcm.csv',index=False)