## Elbow method

In [1]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
from sklearn.metrics import calinski_harabasz_score
from sklearn.metrics import davies_bouldin_score

In [None]:
def elbow_plot(df1, df2):
    '''
    This function plots the elbow method for the given dataframes

    Parameters
    ----------
    df1 : pd.DataFrame
        Dataframe to be used for elbow method
    df2 : pd.DataFrame
        Dataframe to be used for elbow method

    Returns
    -------
    None.
    '''
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.suptitle('Elbow Method', fontsize=15)
    K=range(1,10)
    a = []
    b = []
    for i in K:
        kmean=KMeans(n_clusters=i)
        kmean.fit(df1)
        a.append(kmean.inertia_)
        kmean=KMeans(n_clusters=i)
        kmean.fit(df2)
        b.append(kmean.inertia_)
    ax1.plot(K, a, marker='o')
    ax1.set_title('df1', fontsize=15)
    ax1.set_ylabel('Sum of Squared distance', fontsize=15)
    ax2.plot(K, b, marker='o')
    ax2.set_title('df2', fontsize=15)
    fig.supxlabel('Number of clusters', fontsize=15)
    plt.show()

In [None]:
def evaluate_clusters(n_clusters, df1, df2, df1_clusters, df2_clusters):
    '''
    This function prints the evaluation metrics for the given dataframes
    Parameters
    ----------
    n_clusters : int
        Number of clusters used for the clustering
    df1 : pd.DataFrame
        Dataframe to be used for clustering evaluation
    df2 : pd.DataFrame
        Dataframe to be used for clustering evaluation
    df1_clusters : pd.Series
        Series of clusters for df1
    df2_clusters : pd.Series
        Series of clusters for df2
        
    Returns
    -------
    None.
    '''
    print(f'{n_clusters} Clusters\n')
    print('Calinski Harabasz Score')
    print(f'{round(calinski_harabasz_score(df1, df1_clusters),3)}')
    print(f'{round(calinski_harabasz_score(df2, df2_clusters),3)}\n')
    print('Davies Bouldin Score')
    print(f'{round(davies_bouldin_score(df1, df1_clusters),3)}')
    print(f'{round(davies_bouldin_score(df2, df2_clusters),3)}\n')
    print('Silhouette Score')
    print(f'{round(silhouette_score(df1, df1_clusters),3)}')
    print(f'{round(silhouette_score(df2, df2_clusters),3)}')