### Hierarchical clustering (input: data)  

#### Import libraries  

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster

#### Parameters  

In [None]:
csv_in = '../ai-0102/pandas_training-utf8.csv'

# To show more rows and columns
pd.options.display.max_rows = 999 
pd.options.display.max_columns = 999 

#### Read CSV file  

In [None]:
df = pd.read_csv(csv_in, delimiter=',', skiprows=13, header=0)
print(df.shape)
print(df.info())
display(df.head())

#### Separate data  

In [None]:
df_data = df.loc[:, 'FRESH':]
print(df_data.shape)
display(df_data.head())
customers = df_data.index

#### Execute clustering  

In [None]:
Z = linkage(df_data, method='ward', metric='euclidean')
n_data = Z.shape[0]
df_Z = pd.DataFrame(Z,
                    columns=['label1', 'label2',
                             'distance', '#members'],
                    index=['merged{}'.format(i+1+n_data) 
                           for i in range(n_data)])
df_Z['label1'] = df_Z['label1'].astype('int')
df_Z['label2'] = df_Z['label2'].astype('int')
df_Z['#members'] = df_Z['#members'].astype('int')
display(df_Z.head())
display(df_Z.tail())

#### Draw dendgrogram  

In [None]:
dendr = dendrogram(Z, labels=customers)
plt.xticks(rotation=90)
plt.ylabel('Euclidean')
plt.show()

#### Get results of clustering  

In [None]:
dist = df_Z['distance']
n_clusters = 2
print('=== n_clusters:', n_clusters, '===')
cls = pd.Series(fcluster(Z, n_clusters, criterion='maxclust'), index=customers)
print('dist:', dist[n_data-n_clusters])
print(cls.head())

#### Make cross table  

In [None]:
ct = pd.crosstab(df['CHANNEL'], cls)
display(ct)