In [None]:
import numpy as np
import pandas as pd
import holoviews as hv

animals = pd.read_csv('./animals.csv', sep='\t')
animals = animals.drop([c for c in animals.columns if 'GR' in c or 'g_EXT' in c], axis=1)
animals = animals.rename(columns={c: c.split('_')[1] if len(c.split('_')) > 1 else c for c in animals.columns})
animals = animals[['Order', 'Species', 'AdultBodyMass', 'Temp', 'SexualMaturityAge', 'TrophicLevel',
                   'NeonateBodyMass', 'GestationLen', 'SocialGrpSize', 'WeaningAge']].replace(-999.0, np.NaN)
animals['Temp'] = animals.Temp/10.
animals['SexualMaturityAge'] = animals.SexualMaturityAge/365.
animals['WeaningAge'] = animals.WeaningAge/365.

animals['Social'] = [('False' if s > 1 else 'True') if np.isfinite(s) else np.NaN for s in animals.SocialGrpSize]

largest_orders = list(animals.groupby('Order').agg('count').sort_values('Species').reset_index().Order)[-8:]
dataset = hv.Dataset(animals, kdims=['Order', 'Species', 'Social']).redim(**{'Temp': dict(name='Temperature')})
dataset = dataset.select(Order=set(largest_orders))
dataset.data.to_csv('animals_clean.csv', index=False)