In [458]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from uncertainties import unumpy as unp
import uncertainties

In [459]:
raw_data = pd.read_excel('data/raw_data.xlsx')
raw_data = raw_data[~raw_data.units.str.contains('trap')]

### 1) Remove non-mass measurements

In [67]:
dry = lambda x: 10**(-7.761+0.34975*x-3.9315e-3*x**2)
wet = lambda x: 10**(-6.972+0.3687*x-4.1725e-3*x**2)

size = np.linspace(2.4,60)
(dry(size)/wet(size))

array([0.14686157, 0.14006354, 0.1337852 , 0.12798441, 0.12262305,
       0.1176666 , 0.11308379, 0.10884626, 0.10492832, 0.10130665,
       0.09796011, 0.09486949, 0.09201739, 0.08938802, 0.08696706,
       0.08474152, 0.08269967, 0.08083089, 0.07912559, 0.07757514,
       0.07617181, 0.07490865, 0.0737795 , 0.07277891, 0.07190206,
       0.07114481, 0.07050358, 0.06997535, 0.06955768, 0.06924862,
       0.06904674, 0.06895112, 0.0689613 , 0.06907735, 0.06929978,
       0.06962964, 0.07006844, 0.07061822, 0.07128156, 0.07206155,
       0.07296189, 0.07398685, 0.07514137, 0.07643103, 0.07786213,
       0.07944178, 0.08117787, 0.08307922, 0.08515559, 0.08741783])

In [460]:
mass_data = raw_data[raw_data.units.str.startswith('mg')]
mass_data['norm value'] = mass_data['numerical value']
mass_data.loc[mass_data.units=='mg/m^2 (wet weight)','norm value'] = mass_data.loc[mass_data.units=='mg/m^2 (wet weight)','norm value']*0.3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [3]:
# mass_data = raw_data[raw_data.units=='mg/m^2 (dry weight)']

In [461]:
metadata = pd.read_csv('data/groups_per_reference.csv')
mass_data_with_meta = mass_data.merge(metadata,left_on='reference', right_on='Paper',how='left')

In [462]:
mass_data_with_meta.groupby('Standard groups')['site'].nunique()

Standard groups
Acari                  9
Arthropods           174
Collembola, Acari     48
Formicidae            49
Isoptera              23
Macroarthropods       25
Microarthropods        2
Orthoptera             1
Some arthropods        4
Name: site, dtype: int64

In [529]:
valid_data = mass_data_with_meta.copy()
valid_data = valid_data[(valid_data['Standard groups']!='Microarthropods') | ((valid_data['Standard groups']=='Microarthropods') & (valid_data['sub-class'].isin(['Acari','Collembola'])))]

In [530]:
valid_data.loc[valid_data['sub-class']=='Acari','aggregated taxon'] = 'Acari'
valid_data.loc[valid_data['sub-class']=='Collembola','aggregated taxon'] = 'Collembola'
valid_data.loc[valid_data['super-family']=='Isoptera','aggregated taxon'] = 'Isoptera'
valid_data.loc[valid_data['family']=='Formicidae','aggregated taxon'] = 'Formicidae'
valid_data.loc[valid_data['aggregated taxon'].isna(),'aggregated taxon'] = 'Other'

In [531]:
soil_data = valid_data[valid_data['aggregated environment'] =='soil/litter']
canopy_data = valid_data[valid_data['aggregated environment'] =='plants']
surface_data = valid_data[valid_data['aggregated environment'] =='above ground']

In [532]:
soil_data.pivot_table(columns='aggregated taxon',values='site',index='aggregated biome', aggfunc='nunique')

aggregated taxon,Acari,Collembola,Formicidae,Isoptera,Other
aggregated biome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Boreal Forests/Taiga,20.0,19.0,,,16.0
Crops,,,6.0,10.0,6.0
Deserts and Xeric Shrublands,1.0,1.0,4.0,5.0,
"Mediterranean Forests, Woodlands and Scrub",,,,2.0,
Pasture,7.0,7.0,6.0,5.0,5.0
Shrubland/Grassland,,,14.0,,
Temperate Forests,42.0,41.0,21.0,1.0,29.0
"Temperate Grasslands, Savannas and Shrublands",15.0,17.0,9.0,4.0,15.0
Tropical and Subtropical Forests,5.0,6.0,20.0,20.0,15.0
"Tropical and Subtropical Grasslands, Savannas and Shrublands",2.0,2.0,3.0,10.0,4.0


In [546]:
# placeholder for ants
shrub_index = soil_data.loc[(soil_data['family'] == 'Formicidae') & (soil_data['aggregated biome'] == 'Shrubland/Grassland')].index
soil_data.loc[shrub_index[0:8],'aggregated biome'] = 'Temperate Grasslands, Savannas and Shrublands'
soil_data.loc[shrub_index[8:],'aggregated biome'] = 'Tropical and Subtropical Grasslands, Savannas and Shrublands'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [391]:
soil_site_data = soil_data.groupby(['aggregated taxon','aggregated biome','site'])['norm value'].sum()
soil_biome_means = soil_site_data.groupby(['aggregated taxon','aggregated biome']).agg(['mean','count','sem'])
soil_biome_means.reset_index().pivot_table(columns='aggregated taxon',index='aggregated biome').to_csv('results/biome_means.csv')

soil_biome_means['umean'] = unp.uarray(nominal_values=soil_biome_means['mean'],std_devs=soil_biome_means['sem'])
soil_biome_means = soil_biome_means[['count','umean']]
soil_biome_means

Unnamed: 0_level_0,Unnamed: 1_level_0,count,umean
aggregated taxon,aggregated biome,Unnamed: 2_level_1,Unnamed: 3_level_1
Acari,Boreal Forests/Taiga,20,(5.1+/-1.0)e+02
Acari,Pasture,6,2.3+/-1.0
Acari,Temperate Forests,42,(7.8+/-1.7)e+02
Acari,"Temperate Grasslands, Savannas and Shrublands",15,(3.2+/-0.8)e+02
Acari,Tropical and Subtropical Forests,5,(1.2+/-1.1)e+03
Acari,"Tropical and Subtropical Grasslands, Savannas and Shrublands",2,(7+/-5)e+01
Acari,Tundra,47,(2.0+/-0.4)e+02
Collembola,Boreal Forests/Taiga,19,(6.2+/-2.7)e+02
Collembola,Pasture,6,0.0+/-0
Collembola,Temperate Forests,41,(6.5+/-3.2)e+03


In [547]:
def calc_uval(x):
    return unp.uarray(nominal_values=np.mean(x),std_devs=x.sem())

soil_site_taxa_mean = soil_data.groupby(['aggregated taxon','aggregated biome','site','taxon'])['norm value'].mean().reset_index()
soil_site_data = soil_site_taxa_mean.groupby(['aggregated taxon','aggregated biome','site'])['norm value'].sum()
soil_biome_means = soil_site_data.reset_index().pivot_table(index='aggregated taxon',columns='aggregated biome',values='norm value', aggfunc=[calc_uval,'count'])

soil_biome_means = soil_biome_means.unstack().reset_index().pivot_table(index='aggregated taxon', columns=['aggregated biome','level_0'],values=0,aggfunc=sum)
soil_biome_means.columns = soil_biome_means.columns.set_levels(['mean','N'],1)
def print_u(x):
    if type(x) != uncertainties.core.Variable:
        return '{:.0f}'.format(x )
    else: 
        return '{:.0f}'.format(x.nominal_value)+'±'+'{:.0f}'.format(x.std_dev)

soil_biome_means_print = soil_biome_means.applymap(print_u)
soil_biome_means_print.to_csv('results/table_1.csv')

  outputs = ufunc(*inputs)


In [549]:
soil_biome_means_print

aggregated biome,Boreal Forests/Taiga,Boreal Forests/Taiga,Crops,Crops,Deserts and Xeric Shrublands,Deserts and Xeric Shrublands,"Mediterranean Forests, Woodlands and Scrub","Mediterranean Forests, Woodlands and Scrub",Pasture,Pasture,Temperate Forests,Temperate Forests,"Temperate Grasslands, Savannas and Shrublands","Temperate Grasslands, Savannas and Shrublands",Tropical and Subtropical Forests,Tropical and Subtropical Forests,"Tropical and Subtropical Grasslands, Savannas and Shrublands","Tropical and Subtropical Grasslands, Savannas and Shrublands",Tundra,Tundra
level_0,mean,N,mean,N,mean,N,mean,N,mean,N,mean,N,mean,N,mean,N,mean,N,mean,N
aggregated taxon,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
Acari,349±65,20,0,0,9±nan,1,0,0,43±41,7,753±174,42,285±82,15,162±83,5,74±47,2,194±39,47
Collembola,479±274,19,0,0,1±nan,1,0,0,4±4,7,311±98,41,189±47,17,49±28,6,12±1,2,394±150,34
Formicidae,0,0,49±17,6,72±37,4,0,0,113±33,6,81±26,21,198±99,17,128±76,20,232±68,9,0±nan,1
Isoptera,0,0,747±215,10,1291±514,5,1590±0,2,559±186,5,900±nan,1,395±388,4,1657±301,20,1861±769,10,0±nan,1
Other,264±62,16,549±197,6,0,0,0,0,707±265,5,1047±192,29,578±237,15,842±286,15,206±176,4,64±36,14


In [550]:
soil_biome_means.sum().apply(print_u)

aggregated biome                                              level_0
Boreal Forests/Taiga                                          mean       1093+/-288
                                                              N                  55
Crops                                                         mean       1345+/-292
                                                              N                  22
Deserts and Xeric Shrublands                                  mean       1374+/-nan
                                                              N                  11
Mediterranean Forests, Woodlands and Scrub                    mean         1590+/-0
                                                              N                   2
Pasture                                                       mean       1426+/-329
                                                              N                  30
Temperate Forests                                             mean       3091+/-nan
      

In [496]:
#t= soil_site_data.reset_index().pivot_table(index='aggregated taxon',columns='aggregated biome',values='norm value', aggfunc='median').T.reset_index()
t= soil_site_data.groupby(['aggregated taxon','aggregated biome']).median().reset_index().merge(pd.DataFrame(biome_area1),left_on='aggregated biome',right_index=True)
t.loc[:,'Total'] = (t.loc[:,'norm value']*t.loc[:,'area']).values
(t.pivot_table(values='Total',index='aggregated taxon',columns='aggregated biome',aggfunc=np.nansum)/1e18).sum(axis=1)


aggregated taxon
Acari         0.014586
Collembola    0.005194
Formicidae    0.005101
Isoptera      0.083413
Other         0.011279
dtype: float64

In [497]:
soil_biome_means_unp = soil_site_data.groupby(['aggregated taxon','aggregated biome']).apply(lambda x: unp.uarray(nominal_values= x.mean(),std_devs=x.sem()))
soil_biome_means_unp = soil_biome_means_unp.reset_index()
biome_area = pd.read_csv('data/aggregated biomes data.csv')
biome_area1 = biome_area.groupby('aggregated biome 1')['area'].sum()
biome_area2 = biome_area.groupby('aggregated biome 2')['area'].sum()
# .pivot_table(columns='aggregated taxon',index='aggregated biome')
soil_biome_means_unp1 = soil_biome_means_unp.merge(pd.DataFrame(biome_area1),left_on='aggregated biome',right_index=True)
soil_biome_means_unp2 = soil_biome_means_unp[soil_biome_means_unp['aggregated taxon'].isin(['Formicidae','Isoptera'])]
soil_biome_means_unp2.loc[soil_biome_means_unp2.loc[:,'aggregated biome'].str.contains('Forest'),'aggregated biome'] = 'Forests'
soil_biome_means_unp2.loc[soil_biome_means_unp2.loc[:,'aggregated biome'].str.contains('Grass'),'aggregated biome'] = 'Shrubland/Grassland'
soil_biome_means_unp2 = soil_biome_means_unp2.merge(pd.DataFrame(biome_area2),left_on='aggregated biome',right_index=True)
part1 = soil_biome_means_unp1[soil_biome_means_unp1['aggregated taxon'].isin(['Acari','Collembola','Formicidae','Isoptera','Other'])]
part2 = soil_biome_means_unp2[soil_biome_means_unp2['aggregated taxon'].isin(['Formicidae','Isoptera'])]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


In [488]:
soil_site_data.loc[('Acari','Tropical and Subtropical Forests')].sort_values()

site
E09 – Borneo – Sepilok – tropical rain forest                                                               29.271223
E17 – Zaire – Haut-Shaba – miombo, open forest                                                              31.336878
E18 – Zaire – Haut-Shaba – river forest                                                                    124.222020
E16 – Zaire – Haut-Shaba – dry dense evergreen forest                                                      147.580460
Teaching and Research Farm of the Obafemi Awolowo University, Ile-Ife, Oyo State, Nigeria - Forest Plot    479.833333
Name: norm value, dtype: float64

In [498]:
part1.loc[:,'Total'] = (part1.loc[:,'norm value']*part1.loc[:,'area']).values
part1.pivot_table(values='Total',index='aggregated taxon',columns='aggregated biome',aggfunc=np.nansum)/1e18

aggregated biome,Boreal Forests/Taiga,Crops,Deserts and Xeric Shrublands,Pasture,Temperate Forests,"Temperate Grasslands, Savannas and Shrublands",Tropical and Subtropical Forests,"Tropical and Subtropical Grasslands, Savannas and Shrublands",Tundra
aggregated taxon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Acari,0.0055+/-0.0010,,,(6.4+/-2.6)e-05,0.0081+/-0.0019,0.0015+/-0.0004,0.0029+/-0.0015,0.0008+/-0.0005,0.00146+/-0.00029
Collembola,0.008+/-0.004,,,0.0+/-0,0.0033+/-0.0011,0.00103+/-0.00026,0.0009+/-0.0005,0.000128+/-0.000010,0.0029+/-0.0011
Formicidae,,,0.0014+/-0.0007,0.001338319258872+/-nan,0.00087+/-0.00028,0.0011+/-0.0005,0.00058+/-0.00010,0.0023+/-0.0008,0.0+/-nan
Isoptera,,0.018+/-0.004,0.025+/-0.010,,0.009684+/-nan,0.0021+/-0.0021,0.029+/-0.007,0.020+/-0.009,0.0+/-nan
Other,0.0037+/-0.0012,,,,0.0113+/-0.0021,0.0030+/-0.0016,0.012+/-0.006,0.00032+/-0.00014,0.00048+/-0.00027


In [499]:
(part1.groupby('aggregated taxon').Total.sum()/1e18)

aggregated taxon
Acari                    0.0203+/-0.0027
Collembola                 0.016+/-0.005
Formicidae    0.007604425541258772+/-nan
Isoptera       0.10314895879464489+/-nan
Other                      0.030+/-0.006
Name: Total, dtype: object

In [500]:
total_soil = (part1.groupby('aggregated taxon').Total.sum()/1e18).sum()
total_soil

0.17729753128969183+/-nan

In [385]:
part2 = part2[part2['aggregated biome']!='Tundra']
part2.loc[:,'Total'] = (part2.loc[:,'norm value']*part2.loc[:,'area']).values
part2.pivot_table(values='Total',index='aggregated taxon',columns='aggregated biome',aggfunc=sum)/1e18

aggregated biome,Forests,Shrubland/Grassland
aggregated taxon,Unnamed: 1_level_1,Unnamed: 2_level_1
Formicidae,0.0050+/-0.0012,0.031+/-0.007
Isoptera,0.1875468772814954+/-nan,0.10+/-0.04


In [386]:
part2.groupby('aggregated taxon').Total.sum()/1e18

aggregated taxon
Formicidae                0.036+/-0.007
Isoptera      0.28914764157651207+/-nan
Name: Total, dtype: object

In [117]:
total_soil = (part1.Total.sum()+part2.Total.sum())/1e18

In [118]:
canopy_biome_mean = canopy_data.groupby(['aggregated biome','site'])['numerical value'].sum().groupby('aggregated biome').agg(['mean','sem'])
canopy_biome_mean = canopy_biome_mean.merge(pd.DataFrame(biome_area1),left_index=True,right_index=True)
canopy_biome_mean['unp']=unp.uarray(nominal_values=canopy_biome_mean['mean'],std_devs=canopy_biome_mean['sem'])
canopy_biome_mean['Total'] = canopy_biome_mean.unp*canopy_biome_mean.area
canopy_biome_mean.Total/1e18

Boreal Forests/Taiga                0.0121+/-0.0018
Temperate Forests                   0.0027+/-0.0007
Tropical and Subtropical Forests      0.010+/-0.009
Name: Total, dtype: object

In [119]:
total_canopy = canopy_biome_mean.Total.sum()/1e18

In [120]:
surface_biome_mean = surface_data.groupby(['aggregated biome','site'])['numerical value'].sum().groupby('aggregated biome').agg(['mean','sem'])
surface_biome_mean = surface_biome_mean.merge(pd.DataFrame(biome_area1),left_index=True,right_index=True)
surface_biome_mean['unp']=unp.uarray(nominal_values=surface_biome_mean['mean'],std_devs=surface_biome_mean['sem'])
surface_biome_mean['Total'] = surface_biome_mean.unp*surface_biome_mean.area
surface_biome_mean.Total/1e18

Crops                                             0.0079+/-0.0016
Pasture                                           0.0004+/-0.0004
Temperate Grasslands, Savannas and Shrublands    0.00284532+/-nan
Name: Total, dtype: object

In [121]:
total_surface = surface_biome_mean.Total.sum()/1e18

In [493]:
total_surface+total_canopy+total_soil

0.21376374854241784+/-nan

In [24]:
print(canopy_data.site.nunique())
print(surface_data.site.nunique())

14
18
