# Exercise 2.4

In [28]:
import pandas as pd
import itertools
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

## a)

In [29]:
raw_birddata = pd.read_csv('data/birds2024.csv', sep=';', index_col=0)
raw_birddata

Unnamed: 0_level_0,group,length,wspan,weight,AR,wload,back,belly,ftype
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
naurulokki,laridae,34-38,86-99,200-350,8.13,0.31,light grey,white,B
harmaalokki,laridae,55-65,123-148,800-1300,8.24,0.64,bluish grey,white,B
isolokki,laridae,63-68,138-158,1000-1800,8.24,0.66,bluish grey,white,B
kalatiira,sternidae,36-42,70-80,100-145,9.14,0.24,grey,white,B
lapintiira,sternidae,33-37,66-77,90-130,8.97,0.2,grey,white,B
suokukko,scolopacidae,25-26,46-49,90-130,6.73,0.36,dappled brown,white,C
taivaanvuohi,scolopacidae,25-27,39-45,90-110,5.91,0.4,dappled brown,white,C
lehtokurppa,scolopacidae,34-36,55-65,280-330,5.68,0.54,dappled brown,dappled beige,C
karikukko,scolopacidae,21-24,43-49,90-130,7.21,0.45,black-brown,white,C
metsäviklo,scolopacidae,21-24,39-44,75-85,7.2,0.29,brown,white,C


In [30]:
def avg_from_str(str): # Get mean from a string of form 'a-b' where a and b are numbers
  min_v, max_v = str.split('-')
  return (float(max_v)+float(min_v))/2

# DF of processed numerical data
num_birddata_processed = raw_birddata.copy()[['length', 'wspan', 'weight', 'AR', 'wload']]

# Get average from ranges
num_birddata_processed.length = raw_birddata.length.map(lambda x: avg_from_str(x))
num_birddata_processed.wspan = raw_birddata.wspan.map(lambda x: avg_from_str(x))
num_birddata_processed.weight = raw_birddata.weight.map(lambda x: avg_from_str(x))

# Add more informative metrics
num_birddata_processed['BMI'] = num_birddata_processed.weight / num_birddata_processed.length**2
num_birddata_processed['WSI'] = num_birddata_processed.wspan / num_birddata_processed.length

# Min-max scaling
num_birddata_processed = num_birddata_processed.apply(lambda x: (x-x.min())/(x.max()-x.min()), axis=0)
num_birddata_processed

Unnamed: 0_level_0,length,wspan,weight,AR,wload,BMI,WSI
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
naurulokki,0.152727,0.334204,0.022779,0.762605,0.060185,0.214481,0.879054
harmaalokki,0.327273,0.558747,0.093394,0.785714,0.212963,0.343958,0.670654
isolokki,0.367273,0.624021,0.125285,0.785714,0.222222,0.400416,0.671464
kalatiira,0.174545,0.24282,0.008884,0.97479,0.027778,0.0,0.44608
lapintiira,0.145455,0.224543,0.007745,0.939076,0.009259,0.015081,0.526316
suokukko,0.076364,0.099217,0.007745,0.468487,0.083333,0.144386,0.405667
taivaanvuohi,0.08,0.070496,0.006834,0.296218,0.101852,0.109788,0.239971
lehtokurppa,0.145455,0.164491,0.025513,0.247899,0.166667,0.274415,0.30622
karikukko,0.054545,0.091384,0.007745,0.569328,0.125,0.222778,0.527379
metsäviklo,0.054545,0.067885,0.005011,0.567227,0.050926,0.126235,0.393408


In [40]:
euclidian_ds = np.zeros((len(num_birddata_processed), len(num_birddata_processed)))

for i in range(len(num_birddata_processed)):
  for j in range(len(num_birddata_processed)):
    bird1 = np.array(num_birddata_processed.iloc[i, :])
    bird2 = np.array(num_birddata_processed.iloc[j, :])
    euclidian_ds[i][j] = np.linalg.norm(bird2 - bird1)
    euclidian_ds[j][i] = np.linalg.norm(bird2 - bird1)

euclidian_ds = pd.DataFrame(euclidian_ds, index=num_birddata_processed.index, columns=num_birddata_processed.index)
euclidian_ds

species,naurulokki,harmaalokki,isolokki,kalatiira,lapintiira,suokukko,taivaanvuohi,lehtokurppa,karikukko,metsäviklo,...,harmaahaikara,kattohaikara,kanahaukka,varpushaukka,hiirihaukka,mehiläishaukka,ruskosuohaukka,sinisuohaukka,haarahaukka,merikotka
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
naurulokki,0.0,0.41224,0.494961,0.537179,0.458502,0.614269,0.844823,0.798031,0.483868,0.602311,...,0.945243,1.045541,0.806295,0.720158,0.606286,0.658066,0.43106,0.557765,0.378187,1.351111
harmaalokki,0.41224,0.0,0.100755,0.607504,0.588298,0.713402,0.894036,0.788759,0.624433,0.722079,...,0.742517,0.708791,0.667645,0.790164,0.580133,0.601454,0.489592,0.597813,0.404217,1.111911
isolokki,0.494961,0.100755,0.0,0.693322,0.678988,0.793716,0.963168,0.843763,0.709864,0.80692,...,0.745752,0.656002,0.676412,0.849024,0.604778,0.625901,0.522463,0.646808,0.425812,1.036811
kalatiira,0.537179,0.607504,0.693322,0.0,0.097289,0.558724,0.747743,0.806091,0.517132,0.479969,...,0.941824,1.122554,0.982712,0.819173,0.906038,0.893173,0.843769,0.772793,0.819415,1.658097
lapintiira,0.458502,0.588298,0.678988,0.097289,0.0,0.527917,0.735403,0.788746,0.468235,0.450379,...,0.953783,1.136505,0.960636,0.781061,0.860565,0.856511,0.783774,0.72756,0.761523,1.634855
suokukko,0.614269,0.713402,0.793716,0.558724,0.527917,0.0,0.243948,0.302904,0.182761,0.112892,...,0.828375,1.069008,0.62672,0.334024,0.616545,0.584045,0.690634,0.476636,0.792848,1.488891
taivaanvuohi,0.844823,0.894036,0.963168,0.747743,0.735403,0.243948,0.0,0.226925,0.414222,0.317034,...,0.825348,1.103945,0.639994,0.32578,0.701961,0.642132,0.841146,0.566475,0.969449,1.545433
lehtokurppa,0.798031,0.788759,0.843763,0.806091,0.788746,0.302904,0.226925,0.0,0.412983,0.403661,...,0.741083,0.959467,0.423306,0.253123,0.543506,0.481634,0.72025,0.465289,0.857201,1.340114
karikukko,0.483868,0.624433,0.709864,0.517132,0.468235,0.182761,0.414222,0.412983,0.0,0.182537,...,0.905566,1.088247,0.646703,0.434527,0.607821,0.60369,0.630586,0.503395,0.708963,1.453638
metsäviklo,0.602311,0.722079,0.80692,0.479969,0.450379,0.112892,0.317034,0.403661,0.182537,0.0,...,0.889452,1.125469,0.717948,0.437411,0.699389,0.671926,0.745136,0.554984,0.826393,1.556032
