In [46]:
import numpy as np
import pandas as pd

import bokeh.io
import bokeh.plotting

import bebi103

#local .py file for some plotting functions and non-parametric bootstrapping utils
import plotting_utils

bokeh.io.output_notebook()

We can now read in the data into a dataframe for analyis.

In [47]:
df = pd.read_csv("./20190322_supp_table_2.csv")

We load up the data.

In [48]:
df['species_underscore'] = [spec.replace(" ", "_") for  spec in df['species']]
df_averages = df.groupby(['species', 'species_underscore', 'spiracle'], as_index=False).aggregate(np.average)
df_averages['subfamily'] = df.groupby(['species', 'species_underscore', 'spiracle'], as_index=False).aggregate(max)['subfamily']
species_per_subfam=df_averages.groupby(['subfamily', 'spiracle'], as_index=False).count().groupby('subfamily').aggregate(max).reset_index()[['subfamily', 'species']]
species_per_subfam.columns = ('subfamily', 'subfam_count')
df_averages = df_averages.merge(species_per_subfam, on='subfamily')
df_averages['log area (mm^2)'] = np.log10(df_averages['area (mm^2)'])
df_averages['log dist'] = np.log10(df_averages['depth (mm)'])
df_averages['log mass (g)'] = np.log10(df_averages['mass (g)'])
df_averages['log area/dist'] = np.log10(df_averages['area (mm^2)']/df_averages['depth (mm)'])
df_averages['log area^2/dist'] = np.log10(df_averages['area (mm^2)']**2/df_averages['depth (mm)'])
df['log area (mm^2)'] = np.log10(df['area (mm^2)'])
df['log dist'] = np.log10(df['depth (mm)'])
df['log mass (g)'] = np.log10(df['mass (g)'])
df['log area/dist'] = np.log10(df['area (mm^2)']/df['depth (mm)'])
df['log area^2/dist'] = np.log10(df['area (mm^2)']**2/df['depth (mm)'])

We test the number of bootstrap samples without the smallest beetles.

In [53]:
x = df_averages.loc[(df_averages['spiracle'] == '6'), 'log mass (g)'].values
inds = np.arange(len(x))
count_no_small = 0
bootstrap_count = 10_000
for i in range(bootstrap_count):
    bootstrap_inds = np.random.choice(inds, len(inds))
    if np.all(x[bootstrap_inds] > -0.5):
        count_no_small += 1
print('When bootstrapping data, ' + str(count_no_small) + ' out of ' + str(bootstrap_count) + ' (' + str(np.round(count_no_small/bootstrap_count, 3)*100) + '%)' + ' bootstrap replicates contained neither of the smallest animals as data points.')

When bootstrapping data, 1152 out of 10000 (11.5%) bootstrap replicates contained neither of the smallest animals as data points.


Over 10% of the bootstrap samples do not have the smallest beetles. The slopes from these regressions will hence have the opportunity to influence the range of slopes and intercept sample statistics we generate via this non-parametric methodology. 