In [46]:
import baltic as bt
import numpy as np
import pandas as pd
import matplotlib as mpl
%matplotlib inline
import matplotlib.pyplot as plt
import re
from matplotlib import gridspec
import scipy.stats as stats
from scipy.stats import gaussian_kde
from matplotlib import rcParams 
# from matplotlib import style
from matplotlib.patches import Patch
from matplotlib.patches import Rectangle
import matplotlib.patheffects as path_effects

# 1. Basic leaf shape stats

In [47]:
palms=pd.read_csv('../data_files/palms_alltraits_curated_20220620.csv',sep='\t')
palms.groupby(by='shape').count()['species'].reset_index()

Unnamed: 0,shape,species
0,bipinnate,14
1,cospalmate,442
2,entire,105
3,pinnate,1298
4,variable,139


## 1.1. Using the 2550 species phylogeny to quantify data

In [48]:
treeFileNexus='../data_files/Clean_1_1_MCCT_nexus.nex'
cc=bt.loadNexus(treeFileNexus,absoluteTime=False,tip_regex='_([0-9\-]+)$') ## treeFile here can alternatively be a path to a local file

cc.treeStats()
cc.drawTree()
cc.sortBranches()
cc.setAbsoluteTime(0)


Tree height: 108.328870
Tree length: 18144.538310
strictly bifurcating tree
annotations present

Numbers of objects in tree: 5099 (2549 nodes and 2550 leaves)



In [49]:
# list of species in the tree:
sppintree=[k.name for k in cc.getExternal()]
print('Number of species in the tree: %s'%(len(sppintree)))

intree=palms[palms['tip_name'].isin(sppintree)]
print('Number of species remaining in the dataset: %s'%(len(intree)))
print('Number of species not considered in the stats*: %s'%(len(sppintree)-len(intree)))
print('*These are climbing species or species with no information (few)')

Number of species in the tree: 2550
Number of species remaining in the dataset: 1978
Number of species not considered in the stats*: 572
*These are climbing species or species with no information (few)


In [50]:
shapeperc=intree.groupby(by='shape').count()['species'].reset_index().copy(deep=True)
shapeperc['percentage']=['%.2f'%(x) for x in (shapeperc['species']*100)/len(intree)]
shapeperc

Unnamed: 0,shape,species,percentage
0,bipinnate,14,0.71
1,cospalmate,432,21.84
2,entire,105,5.31
3,pinnate,1288,65.12
4,variable,139,7.03


# 2. Within-species leaf shape variation

In [52]:
intree['shape'].unique()
intree[intree['shape']=='variable']

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,index,species,coordinateUncertaintyInMeters,decimalLatitude,decimalLongitude,gbifID,...,CHELSA_vpd_min_stand,Simard_Pinto_3DGlobalVeg_JGR_stand,MaxStemHeight_m_stand,MaxStemDia_cm_stand,MaxLeafNumber_stand,Max_Blade_Length_m_stand,Max_Rachis_Length_m_stand,Max_Petiole_length_m_stand,StemHeightBladeLength_stand,HeightOverCanopy_stand
188,188,188,195,195,195,Bactris_acanthocarpa,100.0,-3.950000,-68.847224,1.260222e+09,...,-0.085226,1.639349,-0.411648,-0.088756,0.374731,1.112888,0.319509,1.810132,0.274911,-1.572486
203,203,203,211,211,211,Bactris_coloradonis,3615.0,8.631950,-78.959858,1.260244e+09,...,-0.784575,-0.538311,0.792754,0.019137,-1.219380,0.881575,0.319509,1.059462,0.816274,0.690442
205,205,205,213,213,213,Bactris_corossilla,100.0,-0.973717,-73.830278,1.258443e+09,...,-0.317656,2.024892,0.373776,-0.519291,-0.734762,0.634625,-0.072151,-0.748554,0.369627,-1.582306
210,210,210,218,218,218,Bactris_faucium,,-17.080000,-65.366660,1.259842e+09,...,0.532956,-0.538311,0.373776,-1.154232,-0.347803,-0.807886,-1.252286,-0.748554,0.125445,0.618597
215,215,215,223,223,223,Bactris_glandulosa,437.0,8.790000,-83.418789,1.259766e+09,...,-0.467876,-0.538311,0.230882,-0.431984,-1.219380,0.634625,0.058299,0.872526,0.361580,0.643807
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1681,1681,1681,2351,2351,443,Hydriastele_boumae,,,,,...,,,1.811139,1.090952,,0.836817,0.628320,0.087606,1.647940,
1723,1723,1723,2400,2400,492,Iguanura_piahensis,,,,,...,,,-0.411648,,,-0.205191,,-0.748554,-0.451160,
1725,1725,1725,2402,2402,494,Iguanura_sanderiana,,,,,...,,,-0.580655,-0.615804,,0.063328,-0.164402,0.087606,-0.384713,
1728,1728,1728,2405,2405,497,Iguanura_wallichiana,,,,,...,,,0.061875,-0.615804,-0.347803,0.401210,0.257511,-0.748554,0.079015,


In [53]:
intree[intree['shape']=='variable'][['PalmSubfamily','PalmTribe','tip_name']].to_csv('../data_files/Polymorphic_species_20220706.csv',sep='\t')

# 3. Percentage of annotated species per variable

In [54]:
list(intree.columns)
annotated=intree[[x for x in intree.columns if 'stand' in x]].describe().T['count'].reset_index().copy(deep=True)
annotated['percent']=['%.2f'%(s) for s in annotated['count']*100/len(intree)]
annotated

Unnamed: 0,index,count,percent
0,CHELSA_ai_stand,1508.0,76.24
1,CHELSA_bio10_stand,1508.0,76.24
2,CHELSA_bio11_stand,1508.0,76.24
3,CHELSA_bio12_stand,1508.0,76.24
4,CHELSA_bio13_stand,1508.0,76.24
5,CHELSA_bio14_stand,1508.0,76.24
6,CHELSA_bio15_stand,1508.0,76.24
7,CHELSA_bio16_stand,1508.0,76.24
8,CHELSA_bio17_stand,1508.0,76.24
9,CHELSA_bio18_stand,1508.0,76.24


In [58]:
annotated.to_csv('../data_files/variables_sppannotated_20220620.txt',sep='\t')

# 4. Number of climbing species

In [55]:
# list made in the 02_Variables_database notebook
climbing=pd.read_csv('../data_files/climbing_spp_20220620.txt',sep='\t')
# before filtering out species not in the Faurby et al. (2016) tree
climbing.describe()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,index,coordinateUncertaintyInMeters,decimalLatitude,decimalLongitude,gbifID,individualCount,year,CHELSA_ai,...,Climbing,Acaulescent,MaxStemHeight_m,MaxStemDia_cm,MaxLeafNumber,Max_Blade_Length_m,Max_Rachis_Length_m,Max_Petiole_length_m,StemHeightBladeLength,HeightOverCanopy
count,722.0,722.0,722.0,113.0,400.0,400.0,400.0,78.0,391.0,400.0,...,525.0,525.0,722.0,377.0,0.0,722.0,296.0,722.0,197.0,400.0
mean,1369.404432,1369.404432,518.468144,2080.159867,3.103748,85.773303,1532262000.0,1.916667,1980.337596,-79998.93125,...,1.028571,0.007619,12.122576,3.497215,,0.866891,1.620304,0.058289,0.0,130686000000.0
std,801.189738,801.189738,402.691788,7761.648856,9.891852,64.276672,719998300.0,1.531947,31.932278,39578.102259,...,0.166758,0.123325,19.141451,5.273225,,1.240022,1.132431,0.186288,0.0,207940500000.0
min,14.0,14.0,4.0,0.01,-29.368611,-96.033315,94123780.0,1.0,1845.0,-99999.0,...,1.0,0.0,0.0,0.3,,0.0,0.19,0.0,0.0,0.0
25%,530.25,530.25,240.0,1.0,-1.41625,98.15295,912430100.0,1.0,1974.75,-99999.0,...,1.0,0.0,0.0,1.3,,0.0,0.8,0.0,0.0,0.0
50%,1335.0,1335.0,430.5,5.0,3.94333,109.569942,1420575000.0,1.0,1987.0,-99999.0,...,1.0,0.0,0.0,2.2,,0.0,1.3,0.0,0.0,4.11
75%,2115.75,2115.75,605.0,301.0,8.829722,118.708834,1993734000.0,2.5,2002.0,-99999.0,...,1.0,0.0,20.0,4.0,,1.5,2.1625,0.0,0.0,214325000000.0
max,2716.0,2716.0,1891.0,65886.0,27.206512,153.193425,3462293000.0,10.0,2020.0,3.135691,...,2.0,2.0,170.0,70.0,,7.0,6.4,2.1,0.0,1534000000000.0


In [56]:
# list of species in the tree:
sppintree=[k.name for k in cc.getExternal()]
print('Number of species in the tree: %s'%(len(sppintree)))

climbintree=climbing[climbing['tip_name'].isin(sppintree)]
print('Number of Climbing species in the tree: %s'%(len(climbintree)))

Number of species in the tree: 2550
Number of Climbing species in the tree: 572


In [57]:
climbintree[['PalmSubfamily','PalmTribe','tip_name']].to_csv('../data_files/Climbingintree_species_20220706.csv',sep='\t')