### Hierarchical clustering (input: precalculated distance)  

#### Import libraries  

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import squareform
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster

#### Parameters  

In [None]:
csv_in = 'sushi_corr25_dist.csv'

# To show more rows and columns
pd.options.display.max_rows = 999 
pd.options.display.max_columns = 999 

#### Read CSV file (distance data)    

In [None]:
df = pd.read_csv(csv_in, delimiter=',', skiprows=0, header=0)
print(df.shape)
print(df.info())
display(df.head())
items = df.columns
print(items)

#### Convert N x N distance matrix into condensed distance matrix  

In [None]:
v = squareform(df.values)
print(type(v))
print(v.shape)

#### Execute clustering  

In [None]:
Z = linkage(v, method='complete', metric='correlation')
n_data = Z.shape[0]
df_Z = pd.DataFrame(Z,
                    columns=['label1', 'label2', 'distance', '#members'],
                    index=['merged{}'.format(i+1+n_data) for i in range(n_data)])
df_Z['label1'] = df_Z['label1'].astype('int')
df_Z['label2'] = df_Z['label2'].astype('int')
df_Z['#members'] = df_Z['#members'].astype('int')
display(df_Z)

#### Draw dendgrogram  

In [None]:
dendr = dendrogram(Z, labels=items)
plt.xticks(rotation=90)
plt.ylabel('(1 - Pearson Correlation Coefficient) / 2')
plt.show()

#### Get the results of clustering (specify the number of clusters)  

In [None]:
dist = df_Z['distance']
for i in range(1, n_data+1):
    print('=== n_clusters:', i, '===')
    cls = pd.Series(fcluster(Z, i, criterion='maxclust'), index=items)
    print('dist:', dist[n_data-i])
    print(cls)