This script is used to format the data presented in supp. mat. tables 1 & 2.

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import scipy
from scipy import stats
from pathlib import Path

#custom
sys.path.append('./../../lib')
import paths as paths
import utils as utils

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [None]:
dir_base = Path(paths.dir_main)

dir_out_figures = dir_base / "outputs"

# all the inputs are actually in the 'outputs' folder, as they were generated by other analysis scripts
fp_vpd1 = dir_out_figures / "s4_timeseries_gfed_vpd_flip.csv"
fp_fw1 = dir_out_figures / "s5_timeseries_gfed_fwsl_flip.csv"
fp_vpd2 = dir_out_figures / "s4_timeseries_TTB_vpd_flip.csv"
fp_fw2 = dir_out_figures / "s5_timeseries_TTB_fwsl_flip.csv"
fp_sl = dir_out_figures / "s6_timeseries_season_length.csv"
fp_pi = dir_out_figures / "s6_timeseries_peak_value.csv"

# both vpd and fw dataframes also contain the avhrr data
vpd1=pd.read_csv(str(fp_vpd1))
vpd2=pd.read_csv(str(fp_vpd2))
fw1=pd.read_csv(str(fp_fw1))
fw2=pd.read_csv(str(fp_fw2))
sl = pd.read_csv(str(fp_sl))
pi = pd.read_csv(str(fp_pi))

In [None]:
# both vpd and fw also contain avhrr, don't need it in both
vpd1 = vpd1.loc[vpd1.metric=='fireweather'].copy()
vpd2 = vpd2.loc[vpd2.metric=='fireweather'].copy()
vpd1['metric'] = 'vpd' #labelled 'fireweather'
vpd2['metric'] = 'vpd'

#split out fw and avhrr
av1 = fw1.loc[fw1.metric=='avhrr'].copy()
av2 = fw2.loc[fw2.metric=='avhrr'].copy()
fw1 = fw1.loc[fw1.metric=='fireweather'].copy()
fw2 = fw2.loc[fw2.metric=='fireweather'].copy()

# seperate out season length and peak intensity
sl['metric'] = 'season_length'
pi['metric'] = 'peak_value'

### Make Table 1

first combine the GFED data

In [None]:
for i, df in enumerate([av1, sl, pi, fw1, vpd1]):

    # first apply rounding
    df['tau'] = np.round(df['tau'],2)
    df['p'] = np.round(df['p'],3)
    df['slope'] = np.round(df['slope'],3)
    df['lower'] = np.round(df['lower'],3)
    df['upper'] = np.round(df['upper'],3)    
    
    # combine dates
    df['period'] = df.year_start.astype(int).astype(str) + '-' + df.year_end.astype(int).astype(str)

    ## format tau and p
    df['tau_str'] = df['tau'].map(lambda x :f'{np.round(x,2)}') # even though already rounded, float precision issues seems to require this to be rounded again when convert to string or otherwise not precise
    df['p_str'] = df['p'].map(lambda x :f'{np.round(x,3):0<5}')  
    df['stars'] = ''

    df.loc[df.p >= 0.1, 'tau_str'] = 'NS'
    df.loc[df.p < 0.001, 'p_str'] = '< 0.001'
    df.loc[df.p < 0.1, 'stars'] = '*'
    df.loc[df.p < 0.05, 'stars'] = '**'
    df.loc[df.p < 0.01, 'stars'] = '***'

    df['tau_p'] = df.tau_str + " (" + df.p_str + df.stars + ")"

    # Trend (slope*100) and CI
    df['slope_str'] = df['slope'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['lower_str'] = df['lower'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['upper_str'] = df['upper'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['trend_ci'] = df['slope_str'] + " [" + df['lower_str'] + ", " + df['upper_str'] + "]"
    df.loc[df.p >= 0.1, 'trend_ci'] = ''
    
    # combine dfs
    prefix = df.metric.values[0]
    df = df[['region', 'period', 'tau_p', 'trend_ci']]
    df = df.rename(columns={col: col + '_' + prefix    
                            for col in df.columns if col not in ['region', 'period']})  
    if i == 0:
        df_out = df.copy()
    else:
        df_out = df_out.merge(df,on=['region','period'], how='outer')
    #print(df)

df_out1 = df_out.copy()
df_out1 = df_out1[df_out1.region!='global']

now do for biomes

In [None]:
for i, df in enumerate([av2, fw2, vpd2]):
    # first apply rounding
    df['tau'] = np.round(df['tau'],2)
    df['p'] = np.round(df['p'],4)
    df['slope'] = np.round(df['slope'],3)
    df['lower'] = np.round(df['lower'],3)
    df['upper'] = np.round(df['upper'],3)  
    
    # combine dates
    df['period'] = df.year_start.astype(int).astype(str) + '-' + df.year_end.astype(int).astype(str)

    ## format tau and p
    df['tau_str'] = df['tau'].map(lambda x :f'{np.round(x,2)}') # even though already rounded, float precision issues seems to require this to be rounded again when convert to string or otherwise not precise
    df['p_str'] = df['p'].map(lambda x :f'{np.round(x,3):0<5}') 
    df['stars'] = ''

    df.loc[df.p >= 0.1, 'tau_str'] = 'NS'
    df.loc[df.p < 0.001, 'p_str'] = '< 0.001'
    df.loc[df.p < 0.1, 'stars'] = '*'
    df.loc[df.p < 0.05, 'stars'] = '**'
    df.loc[df.p < 0.01, 'stars'] = '***'

    df['tau_p'] = df.tau_str + " (" + df.p_str + df.stars + ")"

    # Trend (slope*100) and CI
    df['slope_str'] = df['slope'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['lower_str'] = df['lower'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['upper_str'] = df['upper'].map(lambda x :f'{np.round(x*100, 1)}') 
    df['trend_ci'] = df['slope_str'] + " [" + df['lower_str'] + ", " + df['upper_str'] + "]"
    df.loc[df.p >= 0.1, 'trend_ci'] = ''
    
    # combine dfs
    prefix = df.metric.values[0]
    df = df[['region', 'period', 'tau_p', 'trend_ci']]
    df = df.rename(columns={col: col + '_' + prefix    
                            for col in df.columns if col not in ['region', 'period']})  
    if i == 0:
        df_out = df.copy()
    else:
        df_out = df_out.merge(df,on=['region','period'], how='outer')
    #print(df)

df_out2 = df_out.copy()

In [None]:
df_out2

In [None]:
table1 = pd.concat([df_out2.loc[df_out2['region']=='Global'], 
                    df_out2.loc[df_out2['region']!='Global'], 
                    df_out1]).reset_index(drop=True)

table1.to_csv(str(dir_out_figures / "table1.csv"), index=False)

### Make Table 2

In [None]:
fp_anf = dir_out_figures / '04_stats_for_table2_anf.csv'
fp_pi = dir_out_figures / '04_stats_for_table2_pfam.csv'
fp_sl = dir_out_figures / '04_stats_for_table2_season_length.csv'
fp_vpd = dir_out_figures / '04_stats_for_table2_vpd.csv'
fp_fwsl = dir_out_figures / '04_stats_for_table2_fwsl.csv'

anf = pd.read_csv(str(fp_anf))
pi = pd.read_csv(str(fp_pi))
sl = pd.read_csv(str(fp_sl))
vpd = pd.read_csv(str(fp_vpd))
fdi = pd.read_csv(str(fp_fwsl))

anf['var'] = 'anf'
sl['var'] = 'season_length'
pi['var'] = 'pfam'
vpd['var'] = 'vpd_mean'
fdi['var'] = 'fwsl_mean'

# if global, remove
anf = anf[anf['gfed_name']!='global']
pi = pi[pi['gfed_name']!='global']
sl = sl[sl['gfed_name']!='global']
vpd = vpd[vpd['gfed_name']!='Global'] #note capital 'G'
fdi = fdi[fdi['gfed_name']!='Global'] #note capital 'G'

anf.loc[anf.gfed_name=='SEAS', 'gfed_name'] = 'SOAS' # in case of residual region name typo

In [None]:
for i, df in enumerate([anf, sl, pi, fdi, vpd]):
    if df['var'].iloc[0] == 'vpd_mean': #need to convert kPa -> Pa or everything rounded to 0 or 1
        df['50%'] = df['50%'] * 1000
        df['25%'] = df['25%'] * 1000
        df['75%'] = df['75%'] * 1000
    # first apply rounding  
    df['50%'] = np.round(df['50%'],0)
    df['25%'] = np.round(df['25%'],0)
    df['75%'] = np.round(df['75%'],0)
    df['mwu_2side'] = np.round(df['mwu_2side'],0)
    df['mwu_2side_p'] = np.round(df['mwu_2side_p'],3)
    
    ## format median and IQR
    df['med_str'] = df['50%'].map(lambda x :f'{int(x)}') 
    df['lower_str'] = df['25%'].map(lambda x :f'{int(x)}') 
    df['upper_str'] = df['75%'].map(lambda x :f'{int(x)}') 
    df['miqr'] = df['med_str'] + " [" + df['lower_str'] + ", " + df['upper_str'] + "]"

    # get MWU statistic and p value
    df['mwu_str'] = df['mwu_2side'].map(lambda x :f'{int(x)}') 
    df['p_str'] = df['mwu_2side_p'].map(lambda x :f'{x:0<5}') 
    df['stars'] = ''

    df.loc[df.mwu_2side_p >= 0.1, 'mwu_str'] = 'NS'
    df['stars'] = ''
    df.loc[df.mwu_2side_p < 0.001, 'p_str'] = '< 0.001'
    df.loc[df.mwu_2side_p < 0.1, 'stars'] = '*'
    df.loc[df.mwu_2side_p < 0.05, 'stars'] = '**'
    df.loc[df.mwu_2side_p < 0.01, 'stars'] = '***'
    df['mwu_p'] = df.mwu_str + " (" + df.p_str + df.stars + ")"

    # add iqr and calc % changes
    df['iqr'] = df['75%']-df['25%']
    df1 = df.loc[df.era==1]
    df2 = df.loc[df.era==2]
    df3 = df1[['gfed','gfed_name','50%','iqr']].merge(df2[['gfed','gfed_name','50%','iqr']], on=['gfed','gfed_name'])
    df3['med_pc'] = np.round((df3['50%_y'] - df3['50%_x'])/df3['50%_x']*100,0)
    df3['iqr_pc'] = np.round((df3['iqr_y'] - df3['iqr_x'])/df3['iqr_x']*100,0)
    df = df.merge(df3[['gfed', 'gfed_name', 'med_pc', 'iqr_pc']], on=['gfed','gfed_name'])        
    
    # no need for duplication of some stats
    df.loc[df.era==2,'mwu_p'] = ''
    df.loc[df.era==2,'med_pc'] = ''
    df.loc[df.era==2,'iqr_pc'] = ''
    
    # combine dfs
    prefix = df['var'].values[0]
    df = df[['gfed', 'gfed_name', 'era', 'miqr', 'mwu_p', '50%', 'iqr', 'med_pc', 'iqr_pc']]
    df = df.rename(columns={col: col + '_' + prefix    
                            for col in df.columns if col not in ['gfed', 'gfed_name', 'era']})
    if i == 0:
        df_out = df.copy()
    else:
        df_out = df_out.merge(df,on=['gfed', 'gfed_name', 'era'], how='outer')
    #print(df)

# relabel
df_out=df_out.sort_values(['gfed', 'era'], ascending=True)
df_out.loc[df_out.era==1,'era'] = '1986-2000'
df_out.loc[df_out.era==2,'era'] = '2001-2016'
df_out.to_csv(str(dir_out_figures / "table2.csv"), index=False)
df_out