# Exercise 2.4

In [676]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.cluster import normalized_mutual_info_score

## a)

In [677]:
# Load data
raw_birddata = pd.read_csv('data/birds2024.csv', sep=';', index_col=0)
raw_birddata

Unnamed: 0_level_0,group,length,wspan,weight,AR,wload,back,belly,ftype
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
naurulokki,laridae,34-38,86-99,200-350,8.13,0.31,light grey,white,B
harmaalokki,laridae,55-65,123-148,800-1300,8.24,0.64,bluish grey,white,B
isolokki,laridae,63-68,138-158,1000-1800,8.24,0.66,bluish grey,white,B
kalatiira,sternidae,36-42,70-80,100-145,9.14,0.24,grey,white,B
lapintiira,sternidae,33-37,66-77,90-130,8.97,0.2,grey,white,B
suokukko,scolopacidae,25-26,46-49,90-130,6.73,0.36,dappled brown,white,C
taivaanvuohi,scolopacidae,25-27,39-45,90-110,5.91,0.4,dappled brown,white,C
lehtokurppa,scolopacidae,34-36,55-65,280-330,5.68,0.54,dappled brown,dappled beige,C
karikukko,scolopacidae,21-24,43-49,90-130,7.21,0.45,black-brown,white,C
metsäviklo,scolopacidae,21-24,39-44,75-85,7.2,0.29,brown,white,C


In [678]:
def avg_from_str(str): # Get mean from a string of form 'a-b' where a and b are numbers
  min_v, max_v = str.split('-')
  return (float(max_v)+float(min_v))/2

# DF of processed numerical data
num_birddata_processed = raw_birddata.copy()[['length', 'wspan', 'weight', 'AR', 'wload']]

# Get average from ranges
num_birddata_processed.length = raw_birddata.length.map(lambda x: avg_from_str(x))
num_birddata_processed.wspan = raw_birddata.wspan.map(lambda x: avg_from_str(x))
num_birddata_processed.weight = raw_birddata.weight.map(lambda x: avg_from_str(x))

# Add more informative metrics
num_birddata_processed['BMI'] = num_birddata_processed.weight / num_birddata_processed.length**2
num_birddata_processed['WSI'] = num_birddata_processed.wspan / num_birddata_processed.length

# Min-max scaling
num_birddata_processed = num_birddata_processed.apply(lambda x: (x-x.min())/(x.max()-x.min()), axis=0)

num_features_used = ['length', 'wspan', 'AR', 'wload', 'BMI'] # Removing weight and WSI seems to improve NMI (value shown at end part c)) quite a bit
num_birddata_processed = num_birddata_processed[num_features_used] # Comment out to use all features

num_birddata_processed

Unnamed: 0_level_0,length,wspan,AR,wload,BMI
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
naurulokki,0.152727,0.334204,0.762605,0.060185,0.214481
harmaalokki,0.327273,0.558747,0.785714,0.212963,0.343958
isolokki,0.367273,0.624021,0.785714,0.222222,0.400416
kalatiira,0.174545,0.24282,0.97479,0.027778,0.0
lapintiira,0.145455,0.224543,0.939076,0.009259,0.015081
suokukko,0.076364,0.099217,0.468487,0.083333,0.144386
taivaanvuohi,0.08,0.070496,0.296218,0.101852,0.109788
lehtokurppa,0.145455,0.164491,0.247899,0.166667,0.274415
karikukko,0.054545,0.091384,0.569328,0.125,0.222778
metsäviklo,0.054545,0.067885,0.567227,0.050926,0.126235


In [679]:
euclidian_ds = np.zeros((len(num_birddata_processed), len(num_birddata_processed))) # Initialise array for euclidian distances

# Iterate through birds and calculate distances
for i in range(len(num_birddata_processed)):
  for j in range(len(num_birddata_processed)):
    bird1 = np.array(num_birddata_processed.iloc[i, :])
    bird2 = np.array(num_birddata_processed.iloc[j, :])
    euclidian_ds[i][j] = np.linalg.norm(bird2 - bird1)
    euclidian_ds[j][i] = np.linalg.norm(bird2 - bird1)

euclidian_ds = pd.DataFrame(euclidian_ds, index=num_birddata_processed.index, columns=num_birddata_processed.index)
euclidian_ds

species,naurulokki,harmaalokki,isolokki,kalatiira,lapintiira,suokukko,taivaanvuohi,lehtokurppa,karikukko,metsäviklo,...,harmaahaikara,kattohaikara,kanahaukka,varpushaukka,hiirihaukka,mehiläishaukka,ruskosuohaukka,sinisuohaukka,haarahaukka,merikotka
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
naurulokki,0.0,0.348605,0.437476,0.31765,0.292531,0.391158,0.552308,0.555615,0.332005,0.355824,...,0.759981,0.882663,0.68463,0.598454,0.582336,0.602044,0.429391,0.504904,0.367114,1.260256
harmaalokki,0.348605,0.0,0.095571,0.558109,0.563849,0.656802,0.778665,0.696219,0.601709,0.660849,...,0.655212,0.618568,0.634015,0.762852,0.577768,0.598851,0.430257,0.59471,0.282036,1.029964
isolokki,0.437476,0.095571,0.0,0.645251,0.652795,0.738594,0.852921,0.754042,0.685077,0.74789,...,0.657629,0.567354,0.642978,0.819984,0.600397,0.621511,0.463913,0.640433,0.308503,0.961494
kalatiira,0.31765,0.558109,0.645251,0.0,0.055011,0.557259,0.718772,0.793691,0.5107,0.477055,...,0.929775,1.084257,0.977434,0.818465,0.862664,0.874004,0.709283,0.746758,0.636524,1.549291
lapintiira,0.292531,0.563849,0.652795,0.055011,0.0,0.513946,0.677365,0.757207,0.468234,0.430313,...,0.927632,1.091243,0.953084,0.779508,0.837074,0.848991,0.686935,0.717504,0.623993,1.53921
suokukko,0.391158,0.656802,0.738594,0.557259,0.513946,0.0,0.179038,0.285561,0.136336,0.112191,...,0.819702,1.029868,0.615855,0.325787,0.52894,0.539935,0.47918,0.412345,0.564902,1.355872
taivaanvuohi,0.552308,0.778665,0.852921,0.718772,0.677365,0.179038,0.0,0.216233,0.298287,0.277424,...,0.81685,1.055574,0.591413,0.221573,0.511656,0.514503,0.518181,0.397114,0.647634,1.363397
lehtokurppa,0.555615,0.696219,0.754042,0.793691,0.757207,0.285561,0.216233,0.0,0.348322,0.393599,...,0.737897,0.917492,0.384553,0.185375,0.35377,0.363653,0.405379,0.321139,0.55419,1.162356
karikukko,0.332005,0.601709,0.685077,0.5107,0.468234,0.136336,0.298287,0.348322,0.0,0.123951,...,0.877732,1.040747,0.635322,0.431609,0.574431,0.593136,0.505976,0.489,0.559462,1.345367
metsäviklo,0.355824,0.660849,0.74789,0.477055,0.430313,0.112191,0.277424,0.393599,0.123951,0.0,...,0.882201,1.087763,0.707003,0.42882,0.616994,0.629424,0.543429,0.494685,0.599492,1.425098


## b)

In [680]:
# Similarity measure for colours designed for this assignment specifically:
# For the same exact colours, similarity is 1. 
# If a color consist of 2 colors, like black-brown, similarity is 0.5 if the other color matches one of them. Intuitively "half" the same color.
# If colours match but have a shade difference, e.g, grey and light grey or blueish grey and light grey, similarity is 0.75. Intuitively very similar but not the same.
# If the other colour matches the "shade" of the other, e.g. greyish black and grey, similarity is 0.75*0.75. Intuitively grey and greyish are similar but not the same, 
# hence 0.75, and greyish black and grey are further different, so multiplied by 0.75 again.
def color_sim(col1, col2):
  if col1 == col2: # Same color
    return 1
  if len(col1.split('-')) > 1 and len(col2.split('-')) > 1: # Both colors are combinations of 2 colors i.e. brown-black
    spl1, spl2 = col1.split('-')
    spl3, spl4 = col2.split('-')
    return 0.5*max(color_sim(spl1, spl3), color_sim(spl1, spl4), color_sim(spl2, spl3), color_sim(spl2, spl4))
  if len(col1.split('-')) > 1: # 1 color is a combination of 2 colors i.e. brown-black
    spl1, spl2 = col1.split('-')
    return 0.5*max(color_sim(spl1, col2), color_sim(spl2, col2))
  if len(col2.split('-')) > 1: # 1 color is a combination of 2 colors i.e. brown-black
    spl1, spl2 = col2.split('-')
    return 0.5*max(color_sim(spl1, col1), color_sim(spl2, col1))
  if len(col1.split(' ')) > 1 and len(col2.split(' ')) > 1: # Both colors have shade differences i.e. light grey
    spl1, spl2 = col1.split(' ')
    spl3, spl4 = col2.split(' ')
    return 0.75*max(color_sim(spl2, spl4), color_sim(spl2, spl3), color_sim(spl4, spl1))
  if len(col1.split(' ')) > 1: # 1 color has a shade difference i.e. light grey
    spl1, spl2 = col1.split(' ')
    return 0.75*max(color_sim(spl1, col2), color_sim(spl2, col2))
  if len(col2.split(' ')) > 1: # 1 color has a shade difference i.e. light grey
    spl1, spl2 = col2.split(' ')
    return 0.75*max(color_sim(spl1, col1), color_sim(spl2, col1))
  if col1 in col2 or col2 in col1: # 1 color includes the other i.e. greyish and grey
    return 0.75
  return 0 # Not similar at all

In [681]:
color_ds = np.zeros((len(raw_birddata), len(raw_birddata))) # Initialise array for colour distances

# Iterate through birds to calculate distance
for i in range(len(raw_birddata)):
  for j in range(len(raw_birddata)):
    col1 = raw_birddata.iloc[i, 6]
    col2 = raw_birddata.iloc[j, 6]
    col3 = raw_birddata.iloc[i, 7]
    col4 = raw_birddata.iloc[j, 7]
    color_ds[i][j] = 1 - color_sim(col1, col2) + 1 - color_sim(col3, col4) # Adding the distances for the two features NOT SURE IF CORRECT
    color_ds[j][i] = color_ds[i][j]

color_ds = pd.DataFrame(color_ds, index=raw_birddata.index, columns=raw_birddata.index)
color_ds

species,naurulokki,harmaalokki,isolokki,kalatiira,lapintiira,suokukko,taivaanvuohi,lehtokurppa,karikukko,metsäviklo,...,harmaahaikara,kattohaikara,kanahaukka,varpushaukka,hiirihaukka,mehiläishaukka,ruskosuohaukka,sinisuohaukka,haarahaukka,merikotka
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
naurulokki,0.0,0.25,0.25,0.25,0.25,1.0,1.0,2.0,1.0,1.0,...,0.25,1.0,0.75,0.9375,1.5,0.9375,2.0,1.5,2.0,2.0
harmaalokki,0.25,0.0,0.0,0.25,0.25,1.0,1.0,2.0,1.0,1.0,...,0.5,1.0,0.75,0.9375,1.5,0.9375,2.0,1.5,2.0,2.0
isolokki,0.25,0.0,0.0,0.25,0.25,1.0,1.0,2.0,1.0,1.0,...,0.5,1.0,0.75,0.9375,1.5,0.9375,2.0,1.5,2.0,2.0
kalatiira,0.25,0.25,0.25,0.0,0.0,1.0,1.0,2.0,1.0,1.0,...,0.5,1.0,0.5,0.9375,1.5,0.9375,2.0,1.5,2.0,2.0
lapintiira,0.25,0.25,0.25,0.0,0.0,1.0,1.0,2.0,1.0,1.0,...,0.5,1.0,0.5,0.9375,1.5,0.9375,2.0,1.5,2.0,2.0
suokukko,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.625,0.25,...,1.25,1.0,1.5,0.75,0.75,0.75,1.25,0.75,1.25,1.25
taivaanvuohi,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.625,0.25,...,1.25,1.0,1.5,0.75,0.75,0.75,1.25,0.75,1.25,1.25
lehtokurppa,2.0,2.0,2.0,2.0,2.0,1.0,1.0,0.0,1.625,1.25,...,2.0,2.0,2.0,1.25,1.25,1.25,1.25,1.25,1.25,1.25
karikukko,1.0,1.0,1.0,1.0,1.0,0.625,0.625,1.625,0.0,0.5,...,1.25,0.5,1.5,1.125,1.125,1.125,1.625,1.0,1.625,1.5
metsäviklo,1.0,1.0,1.0,1.0,1.0,0.25,0.25,1.25,0.5,0.0,...,1.25,1.0,1.5,0.75,0.75,0.75,1.25,0.5,1.25,1.0


In [682]:
#lambda_val = num_birddata_processed.shape[1]/(num_birddata_processed.shape[1] + 2) # Proportion of numerical features
lambda_val = 0.9 # Alternative lambda for higher weight on numerical features, yields better NMI on clustering
stdev_num = euclidian_ds.stack().std()    # Calculate std over whole df
stdev_cat = color_ds.stack().std()        # Calculate std over whole df
comb_dist = lambda_val*euclidian_ds/stdev_num + (1-lambda_val)*color_ds/stdev_cat # Formula 3.9 from the book
comb_dist

species,naurulokki,harmaalokki,isolokki,kalatiira,lapintiira,suokukko,taivaanvuohi,lehtokurppa,karikukko,metsäviklo,...,harmaahaikara,kattohaikara,kanahaukka,varpushaukka,hiirihaukka,mehiläishaukka,ruskosuohaukka,sinisuohaukka,haarahaukka,merikotka
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
naurulokki,0.0,0.993214,1.235584,0.908797,0.84029,1.236777,1.676263,1.855297,1.075455,1.140415,...,2.115114,2.577204,1.994626,1.791487,1.843163,1.801276,1.51106,1.631992,1.34122,3.776986
harmaalokki,0.993214,0.0,0.260641,1.564573,1.580225,1.961239,2.293583,2.23875,1.810989,1.972275,...,1.871895,1.856968,1.856591,2.23983,1.830705,1.79257,1.513423,1.876909,1.109196,3.148937
isolokki,1.235584,0.260641,0.0,1.802225,1.8228,2.184302,2.496093,2.396445,2.038351,2.209652,...,1.878486,1.717298,1.881033,2.395642,1.892418,1.854367,1.60521,2.001605,1.181378,2.962207
kalatiira,0.908797,1.564573,1.802225,0.0,0.150025,1.689766,2.130244,2.504576,1.562791,1.471033,...,2.620679,3.126989,2.750654,2.391499,2.607672,2.542962,2.274379,2.291572,2.075953,4.56524
lapintiira,0.84029,1.580225,1.8228,0.150025,0.0,1.571643,2.017317,2.405077,1.446978,1.34356,...,2.614835,3.14604,2.684248,2.285254,2.537883,2.474749,2.213433,2.211793,2.041777,4.537748
suokukko,1.236777,1.961239,2.184302,1.689766,1.571643,0.0,0.488271,0.948795,0.478074,0.34847,...,2.448001,2.978661,1.934577,1.015993,1.570031,1.600017,1.519333,1.252054,1.753114,3.910238
taivaanvuohi,1.676263,2.293583,2.496093,2.130244,2.017317,0.488271,0.0,0.759725,0.919746,0.799092,...,2.440224,3.048766,1.867919,0.731782,1.522894,1.530659,1.625697,1.210517,1.97874,3.930761
lehtokurppa,1.855297,2.23875,2.396445,2.504576,2.405077,0.948795,0.759725,0.0,1.226214,1.285938,...,2.352414,2.842205,1.388779,0.718071,1.177318,1.204269,1.318063,1.088326,1.7239,3.382483
karikukko,1.075455,1.810989,2.038351,1.562791,1.446978,0.478074,0.919746,1.226214,0.0,0.423047,...,2.60626,2.923321,1.987666,1.368346,1.75785,1.808861,1.656167,1.503611,1.802033,3.924092
metsäviklo,1.140415,1.972275,2.209652,1.471033,1.34356,0.34847,0.799092,1.285938,0.423047,0.0,...,2.618447,3.136552,2.183155,1.296986,1.81017,1.844071,1.694552,1.434106,1.847447,4.056526


## c)

In [683]:
# Agglomerative hierarchical clustering
n_clusters = 11       # 11 is the number of clusters which yields highest NMI
linkage = 'complete'  # Ward linkage doesn't work with precomputed matrix. Complete yields better results than average. Single is the worst.
clustering = AgglomerativeClustering(n_clusters, metric='precomputed', linkage=linkage).fit(comb_dist) 

clustering.labels_

array([ 6, 10, 10,  6,  6,  9,  2,  2,  9,  9,  9,  9,  9,  9,  9,  9,  2,
        1,  2,  1,  2,  1,  1,  1,  4,  5,  4,  2,  2,  2,  3,  2,  4,  1,
        1,  1,  2,  8,  8,  0,  0,  4,  3,  2,  0,  0,  0,  0,  0,  7])

In [684]:
# Show clustering in dataframe
clustered_birddata = raw_birddata.copy()
clustered_birddata['cluster'] = clustering.labels_
clustered_birddata.sort_values(by='cluster')

Unnamed: 0_level_0,group,length,wspan,weight,AR,wload,back,belly,ftype,cluster
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
sinisuohaukka,accipitridae,42-55,97-118,285-630,5.81,0.21,brown,brown-white,A,0
ruskosuohaukka,accipitridae,43-55,115-140,480-750,6.37,0.34,dark brown,brown,A,0
mehiläishaukka,accipitridae,52-59,110-132,510-1160,5.48,0.38,greyish brown,brown-white,A,0
hiirihaukka,accipitridae,46-58,110-132,550-1000,5.58,0.4,dark brown,brown-white,A,0
haarahaukka,accipitridae,48-58,130-155,560-940,7.14,0.33,dark brown,dark brown,A,0
kaulushaikara,ardeidae,70-80,100-130,800-1900,5.58,0.6,dappled brown,dappled brown,B,0
harmaahaikara,ardeidae,90-100,155-175,930-1150,5.88,0.4,light grey,greyish white,B,0
mustakurkku-uikku,podicipedidae,31-38,46-55,400-720,7.6,1.56,black,reddish brown,C,1
silkkiuikku,podicipedidae,50-55,59-73,800-1200,7.87,1.34,blackish grey,white,C,1
härkälintu,podicipedidae,40-50,77-85,700-900,6.48,1.81,blackish grey,reddish brown,C,1


In [685]:
# Mappings to get more general groupings from the original group
second_level = {'laridae': 'lari', 'sternidae': 'lari', 'scolopacidae': 'charadrii', 'charadriidae': 'charadrii', 'haematopodidae': 'charadrii', 
                'dabbling ducks': 'anatinae', 'diving ducks': 'anatinae', 'gruifores': 'gruidae'} # The last one is a typo in the data I think?
highest_level = {'lari': 'charadriiformes', 'charadrii': 'charadriiformes', 'anatinae': 'anatidae', 'anserinae': 'anatidae', 
                 'gruidae': 'gruiformes', 'rallidae': 'gruiformes', 'accipitridae': 'accipitriformes'}

all_cat_data = raw_birddata.copy()
all_cat_data['group_2'] = all_cat_data["group"].map(second_level).fillna(all_cat_data["group"])
all_cat_data['group_3'] = all_cat_data["group_2"].map(highest_level).fillna(all_cat_data["group_2"])
all_cat_data

Unnamed: 0_level_0,group,length,wspan,weight,AR,wload,back,belly,ftype,group_2,group_3
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
naurulokki,laridae,34-38,86-99,200-350,8.13,0.31,light grey,white,B,lari,charadriiformes
harmaalokki,laridae,55-65,123-148,800-1300,8.24,0.64,bluish grey,white,B,lari,charadriiformes
isolokki,laridae,63-68,138-158,1000-1800,8.24,0.66,bluish grey,white,B,lari,charadriiformes
kalatiira,sternidae,36-42,70-80,100-145,9.14,0.24,grey,white,B,lari,charadriiformes
lapintiira,sternidae,33-37,66-77,90-130,8.97,0.2,grey,white,B,lari,charadriiformes
suokukko,scolopacidae,25-26,46-49,90-130,6.73,0.36,dappled brown,white,C,charadrii,charadriiformes
taivaanvuohi,scolopacidae,25-27,39-45,90-110,5.91,0.4,dappled brown,white,C,charadrii,charadriiformes
lehtokurppa,scolopacidae,34-36,55-65,280-330,5.68,0.54,dappled brown,dappled beige,C,charadrii,charadriiformes
karikukko,scolopacidae,21-24,43-49,90-130,7.21,0.45,black-brown,white,C,charadrii,charadriiformes
metsäviklo,scolopacidae,21-24,39-44,75-85,7.2,0.29,brown,white,C,charadrii,charadriiformes


In [686]:
# Calculate NMI between the true grouping and the clustering
group = 'group_2' # Either group, group_2 or group_3
true_grouping = all_cat_data[group]
true_labels = np.array(true_grouping.map({group: i for i, group in enumerate(pd.unique(true_grouping))}))
true_labels
# NMI
nmi = normalized_mutual_info_score(true_labels, clustering.labels_)
print("NMI:                 ", nmi)
print("\nWhen")
print("Number of clusters:  ", n_clusters)
print("Linkage:             ", linkage)
print("Biological grouping: ", group)
print("Numerical features:  ", num_features_used)
print("Lambda eq. 3.9:      ", lambda_val)

NMI:                  0.7032945018130236

When
Number of clusters:   11
Linkage:              complete
Biological grouping:  group_2
Numerical features:   ['length', 'wspan', 'AR', 'wload', 'BMI']
Lambda eq. 3.9:       0.9
