In [3]:
from scipy.stats import spearmanr
from scipy.cluster import hierarchy
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Dendrogram for feature selection
##### Plots Dendrogram based on Hierarchial clustering for identifying similar features .
* Input parameters:

    * df : Train dataframe 
    * variables : Important Features selcted by the model
    * method : Spearman or Pearson
        * Spearman: This correlation evaluates the monotonic relationship between two continuous or ordinal variables.
        * Pearson: This correlation evaluates the linear relationship between two continuous variables.
    

In [9]:
def Cluster_dendrogram(df,variables,method):
    """
    Creates a dendrogram plot for the clusters of feature.Helps to identify feature within same cluster,thus feature engineering.The clustering performed is Hierarchial clustering and the correlation algorithm is use input.
    Either Pearson(for the linear relationship between two continuous variables. ) or Spearman (the monotonic relationship between two continuous or ordinal variables.)
    
    Signature:
    ---------
    Cluster_dendrogram(df,
    variables,
    method) -> Matplotlib Dendrogram plot
    
    Parameters:
    ----------
        df : pandas DataFrame
            dataframe having variables whose clustering needs to be performed
            
        variables : list
            List of variables for Feature selection
            
        method : string
            "Pearson" : This correlation evaluates the linear relationship between two continuous variables.
            "Spearman" : This correlation evaluates the monotonic relationship between two continuous or ordinal variables.
    
    """
    df_cluster = df[variables]
    liLabels = list(df_cluster)        
    if method == "Pearson" or method == "pearson":
        corr = np.array(df_cluster.corr())    
        title = "Pearson Correlation"
    elif method == "Spearman" or method == "spearman":
        corr = spearmanr(df_cluster).correlation
        title = "Spearman Correlation"
        
    fig  = plt.figure(figsize=(16, 8))
    corr_linkage = hierarchy.ward(corr)
    dendro = hierarchy.dendrogram(corr_linkage, labels=liLabels , leaf_rotation=360,orientation ='left')
    dendro_idx = np.arange(0, len(dendro['ivl']))
    plt.title(title + '- Hierarchial Clustering Dendrogram',{'fontsize': 18,'color' : 'Green'})
    plt.show() 