# Linear regressions on spiracle data with non-parametric confidence intervals for slopes

To begin with, we need to import necessary python packages.

In [3]:
import numpy as np
import pandas as pd

import bokeh.io
import bokeh.plotting

bokeh.io.output_notebook()

We can now read in the data into a dataframe for analyis.

In [4]:
df = pd.read_csv("./20190322_supp_table_2.csv")

We take a look at the format for the data.

In [5]:
df.head()

Unnamed: 0,subfamily,species,sex,mass (g),spiracle,area (mm^2),depth (mm)
0,Cetoniinae,Goliathus goliathus,M,16.28,6,0.274408,2.512648
1,Cetoniinae,Goliathus goliathus,F,18.15,6,0.134949,1.606189
2,Cetoniinae,Coelorrhina hornimani,M,1.13,6,0.212131,0.553833
3,Cetoniinae,Dicronorrhina derbyana,M,2.12,6,0.039532,0.473369
4,Cetoniinae,Dicronorrhina derbyana,F,2.145,6,0.049701,0.49632


For some of this analysis, we will look at the per-species averages for our measurements. To get this, we use a simple aggregate function on the dataframe and take a look at the results.

In [6]:
df_averages = df.groupby(['species', 'spiracle'], as_index=False).aggregate(np.average)
df_averages.head()

Unnamed: 0,species,spiracle,area (mm^2),depth (mm),mass (g)
0,Coelorrhina hornimani,1,0.135347,0.416717,1.13
1,Coelorrhina hornimani,2,0.084207,0.451409,1.13
2,Coelorrhina hornimani,3,0.106693,0.325444,1.13
3,Coelorrhina hornimani,4,0.115574,0.481558,1.13
4,Coelorrhina hornimani,5,0.119145,0.506751,1.13


For our plots, we will log transform the data. We will add a column to the dataframe with the log transformed data.

In [7]:
df_averages['log area (mm^2)'] = np.log10(df_averages['area (mm^2)'])
df_averages['log dist'] = np.log10(df_averages['depth (mm)'])
df_averages['log mass (g)'] = np.log10(df_averages['mass (g)'])
df_averages['log area/dist'] = np.log10(df_averages['area (mm^2)']/df_averages['depth (mm)'])
df_averages['area/dist'] = df_averages['area (mm^2)']/df_averages['depth (mm)']
df_averages['log area^2/dist'] = np.log10(df_averages['area (mm^2)']**2/df_averages['depth (mm)'])
df_averages['area^2/dist'] = df_averages['area (mm^2)']**2/df_averages['depth (mm)']

df_averages.head()

Unnamed: 0,species,spiracle,area (mm^2),depth (mm),mass (g),log area (mm^2),log dist,log mass (g),log area/dist,area/dist,log area^2/dist,area^2/dist
0,Coelorrhina hornimani,1,0.135347,0.416717,1.13,-0.868551,-0.380159,0.053078,-0.488392,0.324794,-1.356943,0.04396
1,Coelorrhina hornimani,2,0.084207,0.451409,1.13,-1.074651,-0.34543,0.053078,-0.729221,0.186543,-1.803872,0.015708
2,Coelorrhina hornimani,3,0.106693,0.325444,1.13,-0.971862,-0.487524,0.053078,-0.484339,0.32784,-1.456201,0.034978
3,Coelorrhina hornimani,4,0.115574,0.481558,1.13,-0.937142,-0.317351,0.053078,-0.61979,0.239999,-1.556932,0.027738
4,Coelorrhina hornimani,5,0.119145,0.506751,1.13,-0.923923,-0.295205,0.053078,-0.628717,0.235116,-1.55264,0.028013


In addition to log transforming the species averaged data, we will do the same for the whole data set.

In [8]:
df['log area (mm^2)'] = np.log10(df['area (mm^2)'])
df['log dist'] = np.log10(df['depth (mm)'])
df['log mass (g)'] = np.log10(df['mass (g)'])
df['area/dist'] = df['area (mm^2)']/df['depth (mm)']
df['log area/dist'] = np.log10(df['area (mm^2)']/df['depth (mm)'])
df['area^2/dist'] = df['area (mm^2)']**2/df['depth (mm)']
df['log area^2/dist'] = np.log10(df['area (mm^2)']**2/df['depth (mm)'])
df.head()

Unnamed: 0,subfamily,species,sex,mass (g),spiracle,area (mm^2),depth (mm),log area (mm^2),log dist,log mass (g),area/dist,log area/dist,area^2/dist,log area^2/dist
0,Cetoniinae,Goliathus goliathus,M,16.28,6,0.274408,2.512648,-0.561603,0.400132,1.211654,0.109211,-0.961735,0.029968,-1.523338
1,Cetoniinae,Goliathus goliathus,F,18.15,6,0.134949,1.606189,-0.869831,0.205797,1.258877,0.084018,-1.075628,0.011338,-1.945459
2,Cetoniinae,Coelorrhina hornimani,M,1.13,6,0.212131,0.553833,-0.673395,-0.256621,0.053078,0.383024,-0.416774,0.081251,-1.090169
3,Cetoniinae,Dicronorrhina derbyana,M,2.12,6,0.039532,0.473369,-1.403054,-0.3248,0.326336,0.083511,-1.078254,0.003301,-2.481309
4,Cetoniinae,Dicronorrhina derbyana,F,2.145,6,0.049701,0.49632,-1.303635,-0.304238,0.331427,0.100139,-0.999397,0.004977,-2.303033


Now we can start to generate some plots for the data and see what we are dealing with. I first define a couple of functions to plot lines representing isometric scaling. 

In [9]:
def generate_line(slope, intercept, point=0, move=100):
    x1 = point-move
    x2 = point+move
    y1 = slope*x1 + intercept
    y2 = slope*x2 + intercept
    return (x1, x2), (y1, y2)

def first_intercept(slope, x_max, y_min):
    return(y_min-slope*x_max)

In order to get confidence intervals for the regressions, we need a function to do bootstrap replicates. To do this, you simply draw samples (with replacement) from the data. With this sample, you then perform the regression again. Doing this over and over again gives boostrap samples from which confidence intervals can be computed. 

In [10]:
def draw_bs_pairs_linreg(x, y, size=1):
    """Perform pairs bootstrap for linear regression."""
    # Set up array of indices to sample from
    inds = np.arange(len(x))

    # Initialize samples
    bs_slope_reps = np.empty(size)
    bs_intercept_reps = np.empty(size)

    # Take samples
    for i in range(size):
        bs_inds = np.random.choice(inds, len(inds))
        bs_x, bs_y = x[bs_inds], y[bs_inds]
        bs_slope_reps[i], bs_intercept_reps[i] = np.polyfit(bs_x, bs_y, deg=1)

    return bs_slope_reps, bs_intercept_reps

Here is a function that performs the bootstrap sampling and builds a bokeh plot for the data. 

In [19]:
def make_plot(df, to_plot, slope_comp, n_cols=4):
    
    plots = []

    for spiracle in ['S', 'T', '1', '2', '3', '4', '5', '6']:

        y_min, y_max = np.min(df[to_plot].values), np.max(df[to_plot].values)
        x_min, x_max = np.min(df['log mass (g)'].values), np.max(df['log mass (g)'].values)
        intercept1 = first_intercept(slope_comp, x_max, y_min)
        line_scale = (y_max - y_min)/10

        p = bokeh.plotting.figure(width=230, height=230,
                                  y_range=(y_min-0.2, y_max+0.2),
                                  x_range=(x_min-0.2, x_max+0.2)
                                 )
        [p.line(generate_line(intercept=i, slope=slope_comp, point=x_max)[0],
                generate_line(intercept=i, slope=slope_comp, point=x_max)[1], color='grey', alpha=0.3)
         for i in line_scale*np.array(range(30))+intercept1]

        p.scatter('log mass (g)', to_plot,
                  source = df.loc[(df['spiracle'] == spiracle)])

        #p.legend.location = 'bottom_right'
        p.xgrid.visible = False
        p.ygrid.visible = False
        
        slope, intercept = np.polyfit(df.loc[(df['spiracle'] == spiracle), 'log mass (g)'].values, 
                              df.loc[(df['spiracle'] == spiracle), to_plot].values, deg=1)
        x = np.array([x_min, x_max])
        y = slope * x + intercept

        p.line(x, y)
        
        bs_slope_reps, bs_intercept_reps = draw_bs_pairs_linreg(
                        df.loc[(df['spiracle'] == spiracle), 'log mass (g)'].values,
                        df.loc[(df['spiracle'] == spiracle), to_plot].values,
                                                        size=10000)
        
        p.title.text = spiracle + ' slope 95% CI: ' + str([round(j, 3) for j in np.percentile(bs_slope_reps, [2.5, 97.5])])

        # x-values
        x = np.linspace(x_min, x_max, 200)

        # y-values of each point
        y = np.outer(bs_slope_reps, x) + np.stack([bs_intercept_reps]*200, axis=1)

        # Compute the 2.5th and 97.5th percentiles
        low, high = np.percentile(y, [2.5, 97.5], axis=0)
        
        p1 = np.append(x, x[::-1])
        p2 = np.append(low, high[::-1])

        p.patch(p1, p2, alpha=0.5)

        plots.append(p)
        

    bokeh.io.show(bokeh.layouts.gridplot(plots,ncols=n_cols))
    
def make_CIs(df, to_plot):
    
    plots = []

    for spiracle in ['S', 'T', '1', '2', '3', '4', '5', '6']:

        bs_slope_reps, bs_intercept_reps = draw_bs_pairs_linreg(
                        df.loc[(df['spiracle'] == spiracle), 'log mass (g)'].values,
                        df.loc[(df['spiracle'] == spiracle), to_plot].values,
                                                        size=10000)
        
        #print(spiracle + ' slope 95% CI: ' + str([round(j, 3) for j in np.percentile(bs_slope_reps, [2.5, 97.5])]))
        #print(spiracle + ' intercept 95% CI: ' + str([round(10**j, 3) for j in np.percentile(bs_intercept_reps, [2.5, 97.5])]))
        

---

## Plot for species averaged mass vs species averaged spiracle area (log transformed)

In [20]:
make_plot(df_averages, 'log area (mm^2)', 2/3)
make_CIs(df_averages, 'log area (mm^2)')

---

## Species averaged mass vs species averaged spiracle depth (log transformed)

In [11]:
make_plot(df_averages, 'log dist', 1/3)

---

## Species averaged mass vs $\frac{\mathrm{species\,averaged\,area}}{\mathrm{species\,averaged\,depth}}$ (log transformed)

In [12]:
make_plot(df_averages, 'log area/dist', 1/3)

---

## Species averaged mass vs $\frac{(\mathrm{species\,averaged\,area})^2}{\mathrm{species\,averaged\,depth}}$ (log transformed)

In [13]:
make_plot(df_averages, 'log area^2/dist', 1)

---
## All data points (not species averaged) for mass vs area (log transformed)

In [14]:
make_plot(df, 'log area (mm^2)', 2/3)

---

## All data points (not species averaged) for mass vs spiracle depth (log transformed)

In [15]:
make_plot(df, 'log dist', 1/3)

---

## All data points (not species averaged) for mass vs $\frac{\mathrm{area}}{\mathrm{depth}}$ (log transformed)

In [16]:
make_plot(df, 'log area/dist', 1/3)

---

## All data points (not species averaged) for mass vs $\frac{(\mathrm{area})^2}{\mathrm{depth}}$ (log transformed)

In [17]:
make_plot(df, 'log area^2/dist', 1)

In [18]:
df.head()

Unnamed: 0,subfamily,species,sex,mass (g),spiracle,area (mm^2),depth (mm),log area (mm^2),log dist,log mass (g),area/dist,log area/dist,area^2/dist,log area^2/dist
0,Cetoniinae,Goliathus goliathus,M,16.28,6,0.274408,2.512648,-0.561603,0.400132,1.211654,0.109211,-0.961735,0.029968,-1.523338
1,Cetoniinae,Goliathus goliathus,F,18.15,6,0.134949,1.606189,-0.869831,0.205797,1.258877,0.084018,-1.075628,0.011338,-1.945459
2,Cetoniinae,Coelorrhina hornimani,M,1.13,6,0.212131,0.553833,-0.673395,-0.256621,0.053078,0.383024,-0.416774,0.081251,-1.090169
3,Cetoniinae,Dicronorrhina derbyana,M,2.12,6,0.039532,0.473369,-1.403054,-0.3248,0.326336,0.083511,-1.078254,0.003301,-2.481309
4,Cetoniinae,Dicronorrhina derbyana,F,2.145,6,0.049701,0.49632,-1.303635,-0.304238,0.331427,0.100139,-0.999397,0.004977,-2.303033


In [38]:
pd.DataFrame({'mass (g)': df.loc[df['spiracle'] == '6'].groupby(['species', 'sex']).sum()['mass (g)'].values})

Unnamed: 0,mass (g)
0,1.13
1,0.1185
2,0.0969
3,2.145
4,2.12
5,21.7
6,31.0
7,1.672
8,2.695
9,18.15


In [39]:
df_summed = pd.DataFrame({'mass (g)': df.loc[df['spiracle'] == '6'].groupby(['species', 'sex']).sum()['mass (g)'].values})
df_summed['area (mm^2)'] =  df.loc[(df['spiracle'] == 'S') | (df['spiracle'] == 'T') | (df['spiracle'] == '1') | (df['spiracle'] == '2') | (df['spiracle'] == '3')].groupby(['species', 'sex']).sum()['area (mm^2)'].values
df_summed['area/depth'] =   df.loc[(df['spiracle'] == 'S') | (df['spiracle'] == 'T') | (df['spiracle'] == '1') | (df['spiracle'] == '2') | (df['spiracle'] == '3')].groupby(['species', 'sex']).sum()['area/dist'].values
df_summed['area^2/depth'] = df.loc[(df['spiracle'] == 'S') | (df['spiracle'] == 'T') | (df['spiracle'] == '1') | (df['spiracle'] == '2') | (df['spiracle'] == '3')].groupby(['species', 'sex']).sum()['area^2/dist'].values

df_summed['log area (mm^2)'] = np.log10(df_summed['area (mm^2)'])
df_summed['log area/depth'] = np.log10(df_summed['area/depth'])
df_summed['log area^2/depth'] = np.log10(df_summed['area^2/depth'])
df_summed['log mass (g)'] = np.log10(df_summed['mass (g)'])

df_summed

Unnamed: 0,mass (g),area (mm^2),area/depth,area^2/depth,log area (mm^2),log area/depth,log area^2/depth,log mass (g)
0,1.13,0.656864,1.805238,0.295398,-0.182524,0.256535,-0.529592,0.053078
1,0.1185,0.144736,0.716759,0.026076,-0.839424,-0.144627,-1.583765,-0.926282
2,0.0969,0.109079,0.56472,0.016279,-0.96226,-0.248167,-1.788385,-1.013676
3,2.145,0.666665,1.917017,0.348459,-0.176092,0.282626,-0.457848,0.331427
4,2.12,0.601384,1.558659,0.513251,-0.220848,0.192751,-0.28967,0.326336
5,21.7,3.043267,7.112382,7.901349,0.48334,0.852015,0.897701,1.33646
6,31.0,4.925296,5.20379,5.62266,0.692432,0.71632,0.749942,1.491362
7,1.672,0.69266,1.803298,0.367176,-0.15948,0.256068,-0.435126,0.223236
8,2.695,1.151732,2.906269,1.558902,0.061351,0.463336,0.192819,0.430559
9,18.15,5.442205,5.591519,9.872123,0.735775,0.74753,0.994411,1.258877


In [44]:
df_summed = pd.DataFrame({'mass (g)': df_averages.loc[df_averages['spiracle'] == '6'].groupby(['species',]).sum()['mass (g)'].values})
df_summed['area (mm^2)'] = df_averages.loc[(df_averages['spiracle'] == 'S') | (df_averages['spiracle'] == 'T') | (df_averages['spiracle'] == '1') | (df_averages['spiracle'] == '2') | (df_averages['spiracle'] == '3')].groupby(['species',]).sum()['area (mm^2)'].values
df_summed['area/depth'] =  df_averages.loc[(df_averages['spiracle'] == 'S') | (df_averages['spiracle'] == 'T') | (df_averages['spiracle'] == '1') | (df_averages['spiracle'] == '2') | (df_averages['spiracle'] == '3')].groupby(['species', ]).sum()['area/dist'].values
df_summed['area^2/depth']= df_averages.loc[(df_averages['spiracle'] == 'S') | (df_averages['spiracle'] == 'T') | (df_averages['spiracle'] == '1') | (df_averages['spiracle'] == '2') | (df_averages['spiracle'] == '3')].groupby(['species', ]).sum()['area^2/dist'].values

df_summed['log area (mm^2)'] = np.log10(df_summed['area (mm^2)'])
df_summed['log area/depth'] = np.log10(df_summed['area/depth'])
df_summed['log area^2/depth'] = np.log10(df_summed['area^2/depth'])
df_summed['log mass (g)'] = np.log10(df_summed['mass (g)'])

df_summed

Unnamed: 0,mass (g),area (mm^2),area/depth,area^2/depth,log area (mm^2),log area/depth,log area^2/depth,log mass (g)
0,1.13,0.656864,1.805238,0.295398,-0.182524,0.256535,-0.529592,0.053078
1,0.1077,0.149267,0.763294,0.024654,-0.826035,-0.117308,-1.608116,-0.967784
2,2.1325,0.634025,1.738719,0.419825,-0.197894,0.240229,-0.376931,0.328889
3,26.35,3.984281,5.313231,5.320611,0.60035,0.725359,0.725962,1.420781
4,2.1835,0.922196,2.324676,0.835299,-0.035177,0.366362,-0.078158,0.339153
5,17.215,5.472407,4.965428,7.692255,0.738178,0.695957,0.886054,1.235907
6,6.951,2.359803,3.492303,3.62998,0.372876,0.543112,0.559904,0.842047
7,0.1312,0.061167,0.359914,0.009428,-1.213484,-0.443801,-2.025593,-0.882066
8,1.375,0.522869,2.066784,0.292933,-0.281607,0.315295,-0.533232,0.138303
9,6.4149,0.887241,1.780307,0.448001,-0.051958,0.250495,-0.348721,0.80719


In [47]:
to_plot = 'log area (mm^2)'
slope_comp = 2/3

df_new = df_summed

y_min, y_max = np.min(df_new[to_plot].values), np.max(df_new[to_plot].values)
x_min, x_max = np.min(df_new['log mass (g)'].values), np.max(df_new['log mass (g)'].values)
intercept1 = first_intercept(slope_comp, x_max, y_min)
line_scale = (y_max - y_min)/10

p = bokeh.plotting.figure(width=230, height=230,
                          y_range=(y_min-0.2, y_max+0.2),
                          x_range=(x_min-0.2, x_max+0.2)
                         )
[p.line(generate_line(intercept=i, slope=slope_comp, point=x_max)[0],
        generate_line(intercept=i, slope=slope_comp, point=x_max)[1], color='grey', alpha=0.3)
 for i in line_scale*np.array(range(30))+intercept1]

p.scatter('log mass (g)', to_plot,
          source = df_new)

#p.legend.location = 'bottom_right'
p.xgrid.visible = False
p.ygrid.visible = False

slope, intercept = np.polyfit(df_new['log mass (g)'].values, 
                      df_new[to_plot].values, deg=1)
x = np.array([x_min, x_max])
y = slope * x + intercept

p.line(x, y)

bs_slope_reps, bs_intercept_reps = draw_bs_pairs_linreg(
                df_new['log mass (g)'].values,
                df_new[to_plot].values,
                                                size=10000)

p.title.text = 'all' + ' slope 95% CI: ' + str([round(j, 3) for j in np.percentile(bs_slope_reps, [2.5, 97.5])])

# x-values
x = np.linspace(x_min, x_max, 200)

# y-values of each point
y = np.outer(bs_slope_reps, x) + np.stack([bs_intercept_reps]*200, axis=1)

# Compute the 2.5th and 97.5th percentiles
low, high = np.percentile(y, [2.5, 97.5], axis=0)

p1 = np.append(x, x[::-1])
p2 = np.append(low, high[::-1])

p.patch(p1, p2, alpha=0.5)

bokeh.io.show(p)

In [22]:
df_averages.loc[df_averages['species'] == spec].sort_values(by='spiracle', ascending=False)

NameError: name 'spec' is not defined

In [None]:
p = bokeh.plotting.figure()
dim = 'log area/dist'
for spec in df_averages['species'].unique():
    p.line([0, 1, 2, 3, 4, 5, 6, 7],    [df_averages.loc[(df_averages['species'] == spec) & (df_averages['spiracle'] == 'S'), dim],
                                         df_averages.loc[(df_averages['species'] == spec) & (df_averages['spiracle'] == 'T'), dim],
                                         df_averages.loc[(df_averages['species'] == spec) & (df_averages['spiracle'] == '1'), dim],
                                         df_averages.loc[(df_averages['species'] == spec) & (df_averages['spiracle'] == '2'), dim],
                                         df_averages.loc[(df_averages['species'] == spec) & (df_averages['spiracle'] == '3'), dim],
                                         df_averages.loc[(df_averages['species'] == spec) & (df_averages['spiracle'] == '4'), dim],
                                         df_averages.loc[(df_averages['species'] == spec) & (df_averages['spiracle'] == '5'), dim],
                                         df_averages.loc[(df_averages['species'] == spec) & (df_averages['spiracle'] == '6'), dim]])
bokeh.io.show(p)

In [None]:
y_min, y_max = np.min(df[to_plot].values), np.max(df[to_plot].values)
x_min, x_max = np.min(df['log mass (g)'].values), np.max(df['log mass (g)'].values)
intercept1 = first_intercept(slope_comp, x_max, y_min)
line_scale = (y_max - y_min)/10

p = bokeh.plotting.figure(width=230, height=230,
                          y_range=(y_min-0.2, y_max+0.2),
                          x_range=(x_min-0.2, x_max+0.2)
                         )
[p.line(generate_line(intercept=i, slope=slope_comp, point=x_max)[0],
        generate_line(intercept=i, slope=slope_comp, point=x_max)[1], color='grey', alpha=0.3)
 for i in line_scale*np.array(range(30))+intercept1]

p.scatter('log mass (g)', to_plot,
          source = df.loc[(df['spiracle'] == spiracle)])

#p.legend.location = 'bottom_right'
p.xgrid.visible = False
p.ygrid.visible = False

slope, intercept = np.polyfit(df.loc[(df['spiracle'] == spiracle), 'log mass (g)'].values, 
                      df.loc[(df['spiracle'] == spiracle), to_plot].values, deg=1)
x = np.array([x_min, x_max])
y = slope * x + intercept

p.line(x, y)

bs_slope_reps, bs_intercept_reps = draw_bs_pairs_linreg(
                df.loc[(df['spiracle'] == spiracle), 'log mass (g)'].values,
                df.loc[(df['spiracle'] == spiracle), to_plot].values,
                                                size=10000)

p.title.text = spiracle + ' slope 95% CI: ' + str([round(j, 3) for j in np.percentile(bs_slope_reps, [2.5, 97.5])])

# x-values
x = np.linspace(x_min, x_max, 200)

# y-values of each point
y = np.outer(bs_slope_reps, x) + np.stack([bs_intercept_reps]*200, axis=1)

# Compute the 2.5th and 97.5th percentiles
low, high = np.percentile(y, [2.5, 97.5], axis=0)

p1 = np.append(x, x[::-1])
p2 = np.append(low, high[::-1])

p.patch(p1, p2, alpha=0.5)

bokeh.io.show(p)


In [30]:
df.loc[df['spiracle'] == 'S', 'area (mm^2)'].values + df.loc[df['spiracle'] == 'T', 'area (mm^2)'].values

array([3.21753758, 2.4809332 , 0.26929908, 0.54683654, 0.45655158,
       1.48514914, 0.78125412, 0.37096324, 0.28427613, 0.25664501,
       0.03059642, 0.2455807 , 0.66884565, 2.52983331, 1.39116795,
       0.07947268])

In [31]:
df.loc[df['spiracle'] == '1']

Unnamed: 0,subfamily,species,sex,mass (g),spiracle,area (mm^2),depth (mm),log area (mm^2),log dist,log mass (g),area/dist,log area/dist,area^2/dist,log area^2/dist
85,Cetoniinae,Goliathus goliathus,M,16.28,1,1.195282,1.463879,0.07747,0.165505,1.211654,0.816517,-0.088035,0.975967,-0.010565
86,Cetoniinae,Goliathus goliathus,F,18.15,1,1.299533,1.100006,0.113787,0.041395,1.258877,1.181387,0.072392,1.535252,0.18618
87,Cetoniinae,Coelorrhina hornimani,M,1.13,1,0.135347,0.416717,-0.868551,-0.380159,0.053078,0.324794,-0.488392,0.04396,-1.356943
88,Cetoniinae,Dicronorrhina derbyana,M,2.12,1,0.055406,0.556517,-1.256446,-0.254522,0.326336,0.099558,-1.001925,0.005516,-2.258371
89,Cetoniinae,Dicronorrhina derbyana,F,2.145,1,0.114516,0.46384,-0.941136,-0.333632,0.331427,0.246886,-0.607504,0.028272,-1.548639
90,Cetoniinae,Mecynorrhina torquata,M,6.951,1,0.425596,0.985361,-0.371003,-0.006405,0.842047,0.431919,-0.364598,0.183823,-0.735601
91,Cetoniinae,Eudicella euthalia,M,2.695,1,0.071135,0.695103,-1.147919,-0.157951,0.430559,0.102337,-0.989969,0.00728,-2.137888
92,Cetoniinae,Eudicella euthalia,F,1.672,1,0.119027,0.353713,-0.924356,-0.451349,0.223236,0.336506,-0.473007,0.040053,-1.397363
93,Cetoniinae,Protaetia orientalis,F,0.935,1,0.103208,0.232689,-0.986285,-0.633225,-0.029188,0.443548,-0.35306,0.045778,-1.339345
94,Cetoniinae,Protaetia orientalis,M,1.815,1,0.096145,0.177152,-1.017075,-0.751654,0.258877,0.542724,-0.265421,0.05218,-1.282496


In [35]:
df_summed

Unnamed: 0,mass (g),area (mm^2),area/depth,area^2/depth,log area (mm^2),log area/depth,log area^2/depth,log mass (g)
0,1.13,1.103714,2.663378,0.4324,0.042857,0.425433,-0.364114,0.053078
1,0.1077,0.15974,0.803726,0.024797,-0.796587,-0.094892,-1.605597,-0.967784
2,2.1325,0.760657,1.985962,0.430364,-0.118811,0.297971,-0.366164,0.328889
3,26.35,4.365984,5.523365,5.347454,0.640082,0.742204,0.728147,1.420781
4,2.1835,1.104579,2.647329,0.855655,0.043197,0.422808,-0.067701,0.339153
5,17.215,6.184834,5.379851,7.793716,0.791328,0.73077,0.891745,1.235907
6,6.951,2.573181,3.80916,3.655382,0.41047,0.580829,0.562933,0.842047
7,0.1312,0.072902,0.435884,0.009757,-1.137263,-0.360629,-2.010675,-0.882066
8,1.375,0.637364,2.41444,0.306203,-0.195613,0.382816,-0.51399,0.138303
9,6.4149,1.007904,1.904368,0.453223,0.003419,0.279751,-0.343688,0.80719
