In [51]:
import pandas as pd
import numpy as np

In [2]:
!head -20 data/frog_tongue_adhesion.csv

# These data are from the paper,
#   Kleinteich and Gorb, Sci. Rep., 4, 5225, 2014.
# It was featured in the New York Times.
#    http://www.nytimes.com/2014/08/25/science/a-frog-thats-a-living-breathing-pac-man.html
#
# The authors included the data in their supplemental information.
#
# Importantly, the ID refers to the identifites of the frogs they tested.
#   I:   adult, 63 mm snout-vent-length (SVL) and 63.1 g body weight,
#        Ceratophrys cranwelli crossed with Ceratophrys cornuta
#   II:  adult, 70 mm SVL and 72.7 g body weight,
#        Ceratophrys cranwelli crossed with Ceratophrys cornuta
#   III: juvenile, 28 mm SVL and 12.7 g body weight, Ceratophrys cranwelli
#   IV:  juvenile, 31 mm SVL and 12.7 g body weight, Ceratophrys cranwelli
date,ID,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (m

In [3]:
# Load the data, the comment argument tells pandas that lines starting with # are comments
df = pd.read_csv('data/frog_tongue_adhesion.csv', comment='#')

# Take a look
df.head()

Unnamed: 0,date,ID,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa)
0,2013_02_26,I,3,1205,46,1.95,-785,884,1.27,-0.29,387,70,0.82,3117,-2030
1,2013_02_26,I,4,2527,44,4.08,-983,248,1.59,-0.181,101,94,0.07,24923,-9695
2,2013_03_01,I,1,1745,34,2.82,-850,211,1.37,-0.157,83,79,0.05,21020,-10239
3,2013_03_01,I,2,1556,41,2.51,-455,1025,0.74,-0.17,330,158,0.52,4718,-1381
4,2013_03_01,I,3,493,36,0.8,-974,499,1.57,-0.423,245,216,0.12,2012,-3975


In [6]:
df.dtypes

date                                                     object
ID                                                       object
trial number                                              int64
impact force (mN)                                         int64
impact time (ms)                                          int64
impact force / body weight                              float64
adhesive force (mN)                                       int64
time frog pulls on target (ms)                            int64
adhesive force / body weight                            float64
adhesive impulse (N-s)                                  float64
total contact area (mm2)                                  int64
contact area without mucus (mm2)                          int64
contact area with mucus / contact area without mucus    float64
contact pressure (Pa)                                     int64
adhesive strength (Pa)                                    int64
dtype: object

In [17]:
df.loc[abs(df['adhesive strength (Pa)']) >= 2000]
df.head()

Unnamed: 0,date,ID,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa)
0,2013_02_26,I,3,1205,46,1.95,-785,884,1.27,-0.29,387,70,0.82,3117,-2030
1,2013_02_26,I,4,2527,44,4.08,-983,248,1.59,-0.181,101,94,0.07,24923,-9695
2,2013_03_01,I,1,1745,34,2.82,-850,211,1.37,-0.157,83,79,0.05,21020,-10239
3,2013_03_01,I,2,1556,41,2.51,-455,1025,0.74,-0.17,330,158,0.52,4718,-1381
4,2013_03_01,I,3,493,36,0.8,-974,499,1.57,-0.423,245,216,0.12,2012,-3975


In [27]:
df.loc[df['ID'] == 'II', ['impact force (mN)', 'adhesive force (mN)']]

Unnamed: 0,impact force (mN),adhesive force (mN)
20,1612,-655
21,605,-292
22,327,-246
23,946,-245
24,541,-553
25,1539,-664
26,529,-261
27,628,-691
28,1453,-92
29,297,-566


c) Extract the adhesive force and the time the frog pulls on the target for juvenile frogs (Frogs III and IV). Hint: We saw the & operator for Boolean indexing across more than one column. The | operator signifies OR, and works analogously. You could also approach this using the isin() method of a Pandas Series.

In [32]:
df.loc[(df['ID'] == 'III') | (df['ID'] == 'IV'), ['ID', 'adhesive force (mN)', 'time frog pulls on target (ms)']]

Unnamed: 0,ID,adhesive force (mN),time frog pulls on target (ms)
40,III,-94,683
41,III,-163,245
42,III,-172,619
43,III,-225,1823
44,III,-301,918
45,III,-93,1351
46,III,-131,1790
47,III,-289,1006
48,III,-104,883
49,III,-229,1218


Practice 2: Split-Apply-Combine of the frog data set
You'll now practice your split-apply-combine skills.

a) Compute standard deviation of the impact forces for each frog.

In [44]:
grouped_by_ID = df.groupby('ID')

In [46]:
df['ID'].describe()

count      80
unique      4
top       III
freq       20
Name: ID, dtype: object

In [47]:
grouped_by_ID['impact force (mN)'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
I,20.0,1530.2,630.207952,473.0,1231.25,1550.5,1904.75,2641.0
II,20.0,707.35,424.573256,245.0,422.0,573.0,799.75,1612.0
III,20.0,550.1,124.273849,324.0,458.75,544.0,615.75,806.0
IV,20.0,419.1,234.864328,22.0,198.0,460.5,599.0,815.0


In [49]:
grouped_by_ID['impact force (mN)'].std()

ID
I      630.207952
II     424.573256
III    124.273849
IV     234.864328
Name: impact force (mN), dtype: float64

b) Compute the coefficient of variation of the impact forces and adhesive forces for each frog.



In [64]:
#define our own function to deal with things not present in pandas
def coeff_of_var(data):
    return np.std(data) / np.mean(data)

In [65]:
coeff_of_var_values = grouped_by_ID.agg(coeff_of_var).reset_index()

In [70]:
df2 = coeff_of_var_values
df2

Unnamed: 0,ID,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa)
0,I,0.447626,0.401419,0.190662,0.401549,-0.247435,0.442872,0.246055,-0.42032,0.387324,0.40252,0.701522,0.802871,-0.762843
1,II,0.493878,0.585033,0.387753,0.585074,-0.429701,0.520059,0.429937,-0.636107,0.406223,0.946239,0.308511,0.888197,-1.105411
2,III,0.463481,0.220191,0.710341,0.220086,-0.415435,0.630255,0.415116,-0.625851,0.414023,0.931615,0.729556,0.808729,-0.68925
3,IV,0.471458,0.546212,0.453335,0.546386,-0.308042,0.745028,0.308086,-0.563343,0.342781,0.780164,0.550932,0.627423,-0.386164


In [77]:
#df.loc[df['ID'] == 'II', ['impact force (mN)', 'adhesive force (mN)']]
df2[['impact force (mN)', 'adhesive force (mN)']]

Unnamed: 0,impact force (mN),adhesive force (mN)
0,0.401419,-0.247435
1,0.585033,-0.429701
2,0.220191,-0.415435
3,0.546212,-0.308042


c) And now, finally.... Compute a DataFrame that has the mean, median, standard deviation, and coefficient of variation of the impact forces and adhesive forces for each frog. After you make this DataFrame, you might want to explore using the pd.melt() function to make it tidy. You can read the documentation and/or ask a TA to help you.

In [88]:
grouped_by_ID['impact force (mN)', 'adhesive force (mN)'].mean().reset_index()

Unnamed: 0,ID,impact force (mN),adhesive force (mN)
0,I,1530.2,-658.4
1,II,707.35,-462.3
2,III,550.1,-206.75
3,IV,419.1,-263.6


In [89]:
grouped_by_ID['impact force (mN)', 'adhesive force (mN)'].median().reset_index()

Unnamed: 0,ID,impact force (mN),adhesive force (mN)
0,I,1550.5,-664.5
1,II,573.0,-517.0
2,III,544.0,-201.5
3,IV,460.5,-233.5


In [87]:
grouped_by_ID['impact force (mN)', 'adhesive force (mN)'].std().reset_index()

Unnamed: 0,ID,impact force (mN),adhesive force (mN)
0,I,630.207952,167.143619
1,II,424.573256,203.8116
2,III,124.273849,88.122448
3,IV,234.864328,83.309442


In [86]:
grouped_by_ID['impact force (mN)', 'adhesive force (mN)'].agg(coeff_of_var).reset_index()

Unnamed: 0,ID,impact force (mN),adhesive force (mN)
0,I,0.401419,-0.247435
1,II,0.585033,-0.429701
2,III,0.220191,-0.415435
3,IV,0.546212,-0.308042
