This script is used to format the data presented in supp. mat. tables 1 & 2.

In [1]:
import sys
import os
import numpy as np
import pandas as pd
import scipy
from scipy import stats
from pathlib import Path

#custom
sys.path.append('./../../lib')
import paths as paths
import utils as utils

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
dir_base = Path(paths.dir_main)

dir_out_figures = dir_base / "outputs"

# all the inputs are actually in the 'outputs' folder, as they were generated by other analysis scripts
fp_vpd1 = dir_out_figures / "s4_timeseries_gfed_vpd_flip.csv"
fp_fw1 = dir_out_figures / "s5_timeseries_gfed_fwsl_flip.csv"
fp_vpd2 = dir_out_figures / "s4_timeseries_TTB_vpd_flip.csv"
fp_fw2 = dir_out_figures / "s5_timeseries_TTB_fwsl_flip.csv"
fp_sl = dir_out_figures / "s6_timeseries_season_length.csv"
fp_pi = dir_out_figures / "s6_timeseries_peak_value.csv"

# both vpd and fw dataframes also contain the avhrr data
vpd1=pd.read_csv(str(fp_vpd1))
vpd2=pd.read_csv(str(fp_vpd2))
fw1=pd.read_csv(str(fp_fw1))
fw2=pd.read_csv(str(fp_fw2))
sl = pd.read_csv(str(fp_sl))
pi = pd.read_csv(str(fp_pi))

In [3]:
# both vpd and fw also contain avhrr, don't need it in both
vpd1 = vpd1.loc[vpd1.metric=='fireweather'].copy()
vpd2 = vpd2.loc[vpd2.metric=='fireweather'].copy()
vpd1['metric'] = 'vpd' #labelled 'fireweather'
vpd2['metric'] = 'vpd'

#split out fw and avhrr
av1 = fw1.loc[fw1.metric=='avhrr'].copy()
av2 = fw2.loc[fw2.metric=='avhrr'].copy()
fw1 = fw1.loc[fw1.metric=='fireweather'].copy()
fw2 = fw2.loc[fw2.metric=='fireweather'].copy()

# seperate out season length and peak intensity
sl['metric'] = 'season_length'
pi['metric'] = 'peak_value'

### Make Table 1

first combine the GFED data

In [4]:
for i, df in enumerate([av1, sl, pi, fw1, vpd1]):

    # first apply rounding
    df['tau'] = np.round(df['tau'],2)
    df['p'] = np.round(df['p'],3)
    df['slope'] = np.round(df['slope'],3)
    df['lower'] = np.round(df['lower'],3)
    df['upper'] = np.round(df['upper'],3)    
    
    # combine dates
    df['period'] = df.year_start.astype(int).astype(str) + '-' + df.year_end.astype(int).astype(str)

    ## format tau and p
    df['tau_str'] = df['tau'].map(lambda x :f'{np.round(x,2)}') # even though already rounded, float precision issues seems to require this to be rounded again when convert to string or otherwise not precise
    df['p_str'] = df['p'].map(lambda x :f'{np.round(x,3):0<5}')  
    df['stars'] = ''

    df.loc[df.p >= 0.1, 'tau_str'] = 'NS'
    df.loc[df.p < 0.001, 'p_str'] = '< 0.001'
    df.loc[df.p < 0.1, 'stars'] = '*'
    df.loc[df.p < 0.05, 'stars'] = '**'
    df.loc[df.p < 0.01, 'stars'] = '***'

    df['tau_p'] = df.tau_str + " (" + df.p_str + df.stars + ")"

    # Trend (slope*100) and CI
    df['slope_str'] = df['slope'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['lower_str'] = df['lower'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['upper_str'] = df['upper'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['trend_ci'] = df['slope_str'] + " [" + df['lower_str'] + ", " + df['upper_str'] + "]"
    df.loc[df.p >= 0.1, 'trend_ci'] = ''
    
    # combine dfs
    prefix = df.metric.values[0]
    df = df[['region', 'period', 'tau_p', 'trend_ci']]
    df = df.rename(columns={col: col + '_' + prefix    
                            for col in df.columns if col not in ['region', 'period']})  
    if i == 0:
        df_out = df.copy()
    else:
        df_out = df_out.merge(df,on=['region','period'], how='outer')
    #print(df)

df_out1 = df_out.copy()
df_out1 = df_out1[df_out1.region!='global']

now do for biomes

In [5]:
for i, df in enumerate([av2, fw2, vpd2]):
    # first apply rounding
    df['tau'] = np.round(df['tau'],2)
    df['p'] = np.round(df['p'],4)
    df['slope'] = np.round(df['slope'],3)
    df['lower'] = np.round(df['lower'],3)
    df['upper'] = np.round(df['upper'],3)  
    
    # combine dates
    df['period'] = df.year_start.astype(int).astype(str) + '-' + df.year_end.astype(int).astype(str)

    ## format tau and p
    df['tau_str'] = df['tau'].map(lambda x :f'{np.round(x,2)}') # even though already rounded, float precision issues seems to require this to be rounded again when convert to string or otherwise not precise
    df['p_str'] = df['p'].map(lambda x :f'{np.round(x,3):0<5}') 
    df['stars'] = ''

    df.loc[df.p >= 0.1, 'tau_str'] = 'NS'
    df.loc[df.p < 0.001, 'p_str'] = '< 0.001'
    df.loc[df.p < 0.1, 'stars'] = '*'
    df.loc[df.p < 0.05, 'stars'] = '**'
    df.loc[df.p < 0.01, 'stars'] = '***'

    df['tau_p'] = df.tau_str + " (" + df.p_str + df.stars + ")"

    # Trend (slope*100) and CI
    df['slope_str'] = df['slope'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['lower_str'] = df['lower'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['upper_str'] = df['upper'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['trend_ci'] = df['slope_str'] + " [" + df['lower_str'] + ", " + df['upper_str'] + "]"
    df.loc[df.p >= 0.1, 'trend_ci'] = ''
    
    # combine dfs
    prefix = df.metric.values[0]
    df = df[['region', 'period', 'tau_p', 'trend_ci']]
    df = df.rename(columns={col: col + '_' + prefix    
                            for col in df.columns if col not in ['region', 'period']})  
    if i == 0:
        df_out = df.copy()
    else:
        df_out = df_out.merge(df,on=['region','period'], how='outer')
    #print(df)

df_out2 = df_out.copy()

In [6]:
df_out2

Unnamed: 0,region,period,tau_p_avhrr,trend_ci_avhrr,tau_p_fireweather,trend_ci_fireweather,tau_p_vpd,trend_ci_vpd
0,Boreal,1986-2016,NS (0.638),,NS (0.327),,0.56 (< 0.001***),"0.4 [0.3, 0.6]"
1,Global,1986-2002,0.83 (< 0.001***),"3.4 [2.2, 5.9]",,,,
2,Global,1986-2016,NS (0.373),,0.48 (< 0.001***),"0.7 [0.4, 1.1]",0.63 (< 0.001***),"0.3 [0.3, 0.4]"
3,Global,2003-2016,-0.47 (0.019**),"-2.8 [-5.0, -0.5]",,,,
4,Temperate,1986-2016,NS (0.835),,0.22 (0.089*),"0.4 [-0.1, 0.8]",0.57 (< 0.001***),"0.3 [0.2, 0.4]"
5,Tropics,1986-2016,NS (0.248),,0.44 (< 0.001***),"0.9 [0.4, 1.4]",0.66 (< 0.001***),"0.3 [0.3, 0.4]"


In [7]:
table1 = pd.concat([df_out2.loc[df_out2['region']=='Global'], 
                    df_out2.loc[df_out2['region']!='Global'], 
                    df_out1]).reset_index(drop=True)

table1.to_csv(str(dir_out_figures / "table1.csv"), index=False)

### Make Table 2

In [8]:
fp_anf = dir_out_figures / '04_stats_for_table2_anf.csv'
fp_pi = dir_out_figures / '04_stats_for_table2_pfam.csv'
fp_sl = dir_out_figures / '04_stats_for_table2_season_length.csv'
fp_vpd = dir_out_figures / '04_stats_for_table2_vpd.csv'
fp_fwsl = dir_out_figures / '04_stats_for_table2_fwsl.csv'

anf = pd.read_csv(str(fp_anf))
pi = pd.read_csv(str(fp_pi))
sl = pd.read_csv(str(fp_sl))
vpd = pd.read_csv(str(fp_vpd))
fdi = pd.read_csv(str(fp_fwsl))

anf['var'] = 'anf'
sl['var'] = 'season_length'
pi['var'] = 'pfam'
vpd['var'] = 'vpd_mean'
fdi['var'] = 'fwsl_mean'

# if global, remove
anf = anf[anf['gfed_name']!='global']
pi = pi[pi['gfed_name']!='global']
sl = sl[sl['gfed_name']!='global']
vpd = vpd[vpd['gfed_name']!='Global'] #note capital 'G'
fdi = fdi[fdi['gfed_name']!='Global'] #note capital 'G'

anf.loc[anf.gfed_name=='SEAS', 'gfed_name'] = 'SOAS' # in case of residual region name typo

In [9]:
for i, df in enumerate([anf, sl, pi, fdi, vpd]):
    if df['var'].iloc[0] == 'vpd_mean': #need to convert kPa -> Pa or everything rounded to 0 or 1
        df['50%'] = df['50%'] * 1000
        df['25%'] = df['25%'] * 1000
        df['75%'] = df['75%'] * 1000
    # first apply rounding  
    df['50%'] = np.round(df['50%'],0)
    df['25%'] = np.round(df['25%'],0)
    df['75%'] = np.round(df['75%'],0)
    df['mwu_2side'] = np.round(df['mwu_2side'],0)
    df['mwu_2side_p'] = np.round(df['mwu_2side_p'],3)
    
    ## format median and IQR
    df['med_str'] = df['50%'].map(lambda x :f'{int(x)}') 
    df['lower_str'] = df['25%'].map(lambda x :f'{int(x)}') 
    df['upper_str'] = df['75%'].map(lambda x :f'{int(x)}') 
    df['miqr'] = df['med_str'] + " [" + df['lower_str'] + ", " + df['upper_str'] + "]"

    # get MWU statistic and p value
    df['mwu_str'] = df['mwu_2side'].map(lambda x :f'{int(x)}') 
    df['p_str'] = df['mwu_2side_p'].map(lambda x :f'{x:0<5}') 
    df['stars'] = ''

    df.loc[df.mwu_2side_p >= 0.1, 'mwu_str'] = 'NS'
    df['stars'] = ''
    df.loc[df.mwu_2side_p < 0.001, 'p_str'] = '< 0.001'
    df.loc[df.mwu_2side_p < 0.1, 'stars'] = '*'
    df.loc[df.mwu_2side_p < 0.05, 'stars'] = '**'
    df.loc[df.mwu_2side_p < 0.01, 'stars'] = '***'
    df['mwu_p'] = df.mwu_str + " (" + df.p_str + df.stars + ")"

    # add iqr and calc % changes
    df['iqr'] = df['75%']-df['25%']
    df1 = df.loc[df.era==1]
    df2 = df.loc[df.era==2]
    df3 = df1[['gfed','gfed_name','50%','iqr']].merge(df2[['gfed','gfed_name','50%','iqr']], on=['gfed','gfed_name'])
    df3['med_pc'] = np.round((df3['50%_y'] - df3['50%_x'])/df3['50%_x']*100,0)
    df3['iqr_pc'] = np.round((df3['iqr_y'] - df3['iqr_x'])/df3['iqr_x']*100,0)
    df = df.merge(df3[['gfed', 'gfed_name', 'med_pc', 'iqr_pc']], on=['gfed','gfed_name'])        
    
    # no need for duplication of some stats
    df.loc[df.era==2,'mwu_p'] = ''
    df.loc[df.era==2,'med_pc'] = ''
    df.loc[df.era==2,'iqr_pc'] = ''
    
    # combine dfs
    prefix = df['var'].values[0]
    df = df[['gfed', 'gfed_name', 'era', 'miqr', 'mwu_p', '50%', 'iqr', 'med_pc', 'iqr_pc']]
    df = df.rename(columns={col: col + '_' + prefix    
                            for col in df.columns if col not in ['gfed', 'gfed_name', 'era']})
    if i == 0:
        df_out = df.copy()
    else:
        df_out = df_out.merge(df,on=['gfed', 'gfed_name', 'era'], how='outer')
    #print(df)

# relabel
df_out=df_out.sort_values(['gfed', 'era'], ascending=True)
df_out.loc[df_out.era==1,'era'] = '1986-2000'
df_out.loc[df_out.era==2,'era'] = '2001-2016'
df_out.to_csv(str(dir_out_figures / "table2.csv"), index=False)
df_out

  df.loc[df.era==2,'med_pc'] = ''
  df.loc[df.era==2,'iqr_pc'] = ''
  df.loc[df.era==2,'med_pc'] = ''
  df.loc[df.era==2,'iqr_pc'] = ''
  df.loc[df.era==2,'med_pc'] = ''
  df.loc[df.era==2,'iqr_pc'] = ''
  df.loc[df.era==2,'med_pc'] = ''
  df.loc[df.era==2,'iqr_pc'] = ''
  df.loc[df.era==2,'med_pc'] = ''
  df.loc[df.era==2,'iqr_pc'] = ''
  df_out.loc[df_out.era==1,'era'] = '1986-2000'


Unnamed: 0,gfed,gfed_name,era,miqr_anf,mwu_p_anf,50%_anf,iqr_anf,med_pc_anf,iqr_pc_anf,miqr_season_length,mwu_p_season_length,50%_season_length,iqr_season_length,med_pc_season_length,iqr_pc_season_length,miqr_pfam,mwu_p_pfam,50%_pfam,iqr_pfam,med_pc_pfam,iqr_pc_pfam,miqr_fwsl_mean,mwu_p_fwsl_mean,50%_fwsl_mean,iqr_fwsl_mean,med_pc_fwsl_mean,iqr_pc_fwsl_mean,miqr_vpd_mean,mwu_p_vpd_mean,50%_vpd_mean,iqr_vpd_mean,med_pc_vpd_mean,iqr_pc_vpd_mean
0,1,BONA,1986-2000,"428 [283, 614]",NS (0.738),428.0,331.0,23.0,46.0,"48 [48, 72]",NS (1.000),48.0,24.0,17.0,-25.0,"152 [58, 184]",NS (0.820),152.0,126.0,3.0,60.0,"6 [5, 7]",NS (0.579),6.0,2.0,-17.0,0.0,"555 [520, 564]",NS (0.308),555.0,44.0,2.0,-23.0
1,1,BONA,2001-2016,"525 [334, 817]",,525.0,483.0,,,"56 [48, 66]",,56.0,18.0,,,"157 [83, 285]",,157.0,202.0,,,"5 [4, 6]",,5.0,2.0,,,"565 [545, 579]",,565.0,34.0,,
2,2,TENA,1986-2000,"479 [422, 652]",21 (0.018**),479.0,230.0,98.0,166.0,"56 [40, 68]",18 (0.012**),56.0,28.0,79.0,100.0,"48 [42, 92]",15 (0.007***),48.0,50.0,233.0,56.0,"12 [12, 13]",5 (< 0.001***),12.0,1.0,33.0,400.0,"1432 [1407, 1465]",11 (0.001***),1432.0,58.0,8.0,112.0
3,2,TENA,2001-2016,"948 [652, 1263]",,948.0,611.0,,,"100 [72, 128]",,100.0,56.0,,,"160 [98, 176]",,160.0,78.0,,,"16 [14, 19]",,16.0,5.0,,,"1541 [1483, 1606]",,1541.0,123.0,,
4,3,CEAM,1986-2000,"594 [353, 714]",NS (0.413),594.0,361.0,-31.0,-26.0,"104 [68, 108]",NS (0.439),104.0,40.0,-19.0,-55.0,"85 [50, 106]",NS (0.664),85.0,56.0,-31.0,-11.0,"31 [26, 33]",NS (0.922),31.0,7.0,-6.0,0.0,"2037 [1948, 2078]",23 (0.027**),2037.0,130.0,4.0,-51.0
5,3,CEAM,2001-2016,"410 [271, 537]",,410.0,266.0,,,"84 [70, 88]",,84.0,18.0,,,"59 [41, 91]",,59.0,50.0,,,"29 [26, 33]",,29.0,7.0,,,"2122 [2085, 2149]",,2122.0,64.0,,
6,4,NHSA,1986-2000,"327 [260, 410]",NS (0.664),327.0,150.0,16.0,11.0,"96 [72, 100]",NS (1.000),96.0,28.0,-8.0,14.0,"50 [38, 67]",NS (0.688),50.0,29.0,14.0,114.0,"16 [13, 21]",21 (0.018**),16.0,8.0,50.0,-12.0,"1511 [1461, 1577]",15 (0.005***),1511.0,116.0,9.0,-51.0
7,4,NHSA,2001-2016,"380 [275, 442]",,380.0,167.0,,,"88 [72, 104]",,88.0,32.0,,,"57 [39, 101]",,57.0,62.0,,,"24 [20, 27]",,24.0,7.0,,,"1647 [1627, 1684]",,1647.0,57.0,,
8,5,SHSA,1986-2000,"5683 [2896, 5934]",NS (0.413),5683.0,3038.0,-7.0,-18.0,"80 [60, 96]",NS (0.140),80.0,36.0,20.0,17.0,"778 [488, 1001]",NS (0.974),778.0,513.0,-11.0,-7.0,"20 [16, 24]",14 (0.004***),20.0,8.0,30.0,0.0,"1608 [1583, 1669]",2 (< 0.001***),1608.0,86.0,10.0,-19.0
9,5,SHSA,2001-2016,"5290 [4041, 6533]",,5290.0,2492.0,,,"96 [78, 120]",,96.0,42.0,,,"696 [483, 960]",,696.0,477.0,,,"26 [24, 32]",,26.0,8.0,,,"1776 [1751, 1821]",,1776.0,70.0,,
