In [26]:
import numpy as np
import pandas as pd 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import utils
import plot_utils
import random

In [27]:
data_file = './data/paleo_meso_ceno_data_period.csv'

data = pd.read_csv(data_file)
data = data.sort_values(by=['mean_ma'], ascending=False)

time_file = './data/timeScale.xlsx'
time = pd.read_excel(time_file,sheet_name='Sheet1')
time_period = utils.get_timesubset(time,3,541)
width_period = [time_period['max_ma'][i] - time_period['min_ma'][i] for i in list(time_period.index)]


In [28]:
data_skeletal = utils.get_stats(data,'skeletal_total','period')
data_skeletal = data_skeletal.sort_values(by=['time'], ascending=True)

In [29]:
periods = np.unique(data.period)

In [30]:
data['publication'] = data['publication_year'].astype(str) + data['reference']

## Figure S7: Test influence of paleocontinents

In [31]:
sampling_test = pd.DataFrame(columns=['time', 'period', 'no_of_samp', 'mean', 'median', 'min', 'max', 'removed_paleocontinent'])

In [32]:
paleocontinents = data.Paleocontinents1.unique()

In [33]:
for i in paleocontinents:
    data_samples = pd.DataFrame(columns=['period', 'mean_ma','numberOfSamples','skeletal_total'])
    temp = data[~(data.Paleocontinents1 == i)].reset_index().copy()
    sample_df = temp[['period', 'mean_ma','numberOfSamples',
                      'skeletal_total']]
    data_samples = pd.concat([data_samples.astype(sample_df.dtypes),sample_df],ignore_index=True)
    data_samples.reset_index()
    stats = utils.get_stats(data_samples,'skeletal_total','period')
    stats['removed_paleocontinent'] = i
    sampling_test = pd.concat([sampling_test,stats],
                             ignore_index=True)

In [34]:
sampling_test = sampling_test.sort_values(by=['time'], ascending=True)

In [35]:
mean = []
min_value = []
max_value = []
period = []
time = []
median = []
for p in periods:
    temp = sampling_test[sampling_test.period == p]
    mean.append(temp['mean'].mean())
    median.append(temp['mean'].quantile(0.5))
    min_value.append(temp['mean'].min())
    max_value.append(temp['mean'].max())
    period.append(p)
    time.append(np.min(temp['time']))

In [36]:
sampling_stats = pd.DataFrame({'time':time,'min':min_value,'max':max_value,'median':median,
                               'mean':mean,'period':period})
sampling_stats = sampling_stats.sort_values(by=['time'], ascending=True)

In [37]:
fig = make_subplots(rows = 2,cols = 1,shared_xaxes = True,row_heights = [0.85,0.15],
                 vertical_spacing = 0.001)


fig.add_trace(go.Scatter(x=sampling_stats['time'], y=sampling_stats['min'],showlegend = False,
                             fill=None,mode='lines',line_color='white',
                        ),row=1,col=1)

fig.add_trace(go.Scatter(x=sampling_stats['time'],y=sampling_stats['max'],showlegend = False,
                         fill='tonexty', # fill area between trace0 and trace1
                         mode='lines', fillcolor='rgba(174, 215, 234,0.5)',
                         line_color = 'white'),row=1,col=1)

fig.add_trace(go.Scatter(x =data_skeletal['time'] ,y = data_skeletal['mean'],
                         mode = 'lines+markers',
                         name = 'All data mean', line = dict(color = 'red',dash = 'dot'),
                         showlegend = True,
                         marker = dict(color = 'red',size = 5),
                         ),row=1,col=1)


fig.update_xaxes(range=(541,-2),showticklabels = False,showline = True, 
                     mirror = True,linecolor = 'black',row=1,col=1)

plot_utils.plot_time(fig,time_period,width_period,2,1)

fig.update_xaxes(range=(541,-2),ticks = 'outside',title = 'Time (Ma)',row=2,col=1)

fig.update_yaxes(title='Skeletal biomass %',showline = True, ticks = 'outside',
                     range=(0,89),mirror = True,linecolor = 'black',row=1,col=1)

fig.update_layout(height = 600, width = 1000,font_color = 'black',
                 paper_bgcolor = 'white',
                 plot_bgcolor = 'white',
                 legend=dict(
                yanchor="top",
                    y=0.99,
                    xanchor="left",
                    x=0.01))
fig.show()
fig.write_image('./figures/Figure_S7.png',scale = 6)

## Figure S8: Influence of publications

In [38]:
sampling_test = pd.DataFrame(columns=['time', 'period', 'no_of_samp', 'mean', 'median', 'min', 'max'])

In [39]:
for i in range(1000):
    data_samples = pd.DataFrame(columns=['period', 'mean_ma','numberOfSamples','skeletal_total'])
    for p in periods:
        temp = data[data.period == p].reset_index().copy()
        # Randomly select one publication to remove from the data
        pubs = np.random.choice(np.unique(temp.publication),1)[0]
        # Remove the randomly selected publication
        temp = temp[~(temp.publication == pubs)].reset_index().reset_index(drop=True)
        sample_df = temp[['period', 'mean_ma','numberOfSamples',
                          'skeletal_total']]
        data_samples = pd.concat([data_samples.astype(sample_df.dtypes),sample_df],ignore_index=True)
    data_samples.reset_index()
    stats = utils.get_stats(data_samples,'skeletal_total','period')
    sampling_test = pd.concat([sampling_test,stats],
                             ignore_index=True)

In [40]:
sampling_test = sampling_test.sort_values(by=['time'], ascending=True)

In [41]:
mean = []
min_value = []
max_value = []
period = []
time = []
median = []
for p in periods:
    temp = sampling_test[sampling_test.period == p]
    mean.append(temp['mean'].mean())
    median.append(temp['mean'].quantile(0.5))
    min_value.append(temp['mean'].min())
    max_value.append(temp['mean'].max())
    period.append(p)
    time.append(np.min(temp['time']))

In [42]:
sampling_stats = pd.DataFrame({'time':time,'min':min_value,'max':max_value,'median':median,
                               'mean':mean,'period':period})
sampling_stats = sampling_stats.sort_values(by=['time'], ascending=True)

In [43]:
fig = make_subplots(rows = 2,cols = 1,shared_xaxes = True,row_heights = [0.85,0.15],
                 vertical_spacing = 0.001)


fig.add_trace(go.Scatter(x=sampling_stats['time'], y=sampling_stats['min'],showlegend = False,
                             fill=None,mode='lines',line_color='white',
                        ),row=1,col=1)

fig.add_trace(go.Scatter(x=sampling_stats['time'],y=sampling_stats['max'],showlegend = False,
                         fill='tonexty', # fill area between trace0 and trace1
                         mode='lines', fillcolor='rgba(174, 215, 234,0.5)',
                         line_color = 'white'),row=1,col=1)

fig.add_trace(go.Scatter(x =sampling_stats['time'] ,y = sampling_stats['mean'],
                         mode = 'lines+markers',
                         name = 'Mean', line = dict(color = 'black'),showlegend = True,
                         marker = dict(color = 'black',size = 8),
                         ),row=1,col=1)

fig.add_trace(go.Scatter(x =data_skeletal['time'] ,y = data_skeletal['mean'],
                         mode = 'lines+markers',
                         name = 'All data mean', line = dict(color = 'red',dash = 'dot'),
                         showlegend = True,
                         marker = dict(color = 'red',size = 5),
                         ),row=1,col=1)


fig.update_xaxes(range=(541,-2),showticklabels = False,showline = True, 
                     mirror = True,linecolor = 'black',row=1,col=1)

plot_utils.plot_time(fig,time_period,width_period,2,1)

fig.update_xaxes(range=(541,-2),ticks = 'outside',title = 'Time (Ma)',row=2,col=1)

fig.update_yaxes(title='Skeletal biomass %',showline = True, ticks = 'outside',
                     range=(0,89),mirror = True,linecolor = 'black',row=1,col=1)

fig.update_layout(height = 600, width = 1000,font_color = 'black',
                 paper_bgcolor = 'white',
                 plot_bgcolor = 'white',
                 legend=dict(
                yanchor="top",
                    y=0.99,
                    xanchor="left",
                    x=0.01))
fig.show()
fig.write_image('./figures/Figure_S8.png',scale = 6)

## Figure S9: Effect of uneven sampling

In [44]:
sampling_test = pd.DataFrame(columns=['time', 'period', 'no_of_samp', 'mean', 'median', 'min', 'max'])

In [45]:
for i in range(1000):
    data_samples = pd.DataFrame(columns=['period', 'mean_ma','numberOfSamples','skeletal_total'])
    for p in periods:
        temp = data[data.period == p].reset_index().copy()
        # Randomly select 10 samples from each period
        temp = temp.sample(10).reset_index().reset_index(drop=True)
        sample_df = temp[['period', 'mean_ma','numberOfSamples',
                          'skeletal_total']]
        data_samples = pd.concat([data_samples.astype(sample_df.dtypes),sample_df],ignore_index=True)
    data_samples.reset_index()
    stats = utils.get_stats(data_samples,'skeletal_total','period')
    sampling_test = pd.concat([sampling_test,stats],
                             ignore_index=True)

In [46]:
sampling_test = sampling_test.sort_values(by=['time'], ascending=True)

In [47]:
mean = []
min_value = []
max_value = []
period = []
time = []
median = []
for p in periods:
    temp = sampling_test[sampling_test.period == p]
    mean.append(temp['mean'].mean())
    median.append(temp['mean'].quantile(0.5))
    min_value.append(temp['mean'].min())
    max_value.append(temp['mean'].max())
    period.append(p)
    time.append(np.min(temp['time']))

In [48]:
sampling_stats = pd.DataFrame({'time':time,'min':min_value,'max':max_value,'median':median,
                               'mean':mean,'period':period})
sampling_stats = sampling_stats.sort_values(by=['time'], ascending=True)

In [49]:
fig = make_subplots(rows = 2,cols = 1,shared_xaxes = True,row_heights = [0.85,0.15],
                 vertical_spacing = 0.001)


fig.add_trace(go.Scatter(x=sampling_stats['time'], y=sampling_stats['min'],showlegend = False,
                             fill=None,mode='lines',line_color='white',
                        ),row=1,col=1)

fig.add_trace(go.Scatter(x=sampling_stats['time'],y=sampling_stats['max'],showlegend = False,
                         fill='tonexty', # fill area between trace0 and trace1
                         mode='lines', fillcolor='rgba(174, 215, 234,0.5)',
                         line_color = 'white'),row=1,col=1)


fig.add_trace(go.Scatter(x =data_skeletal['time'] ,y = data_skeletal['mean'],
                         mode = 'lines+markers',
                         name = 'All data mean', line = dict(color = 'red',dash = 'solid'),
                         showlegend = True,
                         marker = dict(color = 'red',size = 8),
                         ),row=1,col=1)

fig.add_trace(go.Scatter(x =data_skeletal['time'] ,y = data_skeletal['min'],
                         mode = 'lines+markers',
                         name = 'All data 25 percentile', line = dict(color = 'black',dash = 'dashdot'),
                         showlegend = True,
                         marker = dict(color = 'red',size = 8),
                         ),row=1,col=1)

fig.add_trace(go.Scatter(x =data_skeletal['time'] ,y = data_skeletal['max'],
                         mode = 'lines+markers',
                         name = 'All data 75 percentile', line = dict(color = 'black',dash = 'dash'),
                         showlegend = True,
                         marker = dict(color = 'black',size = 8),
                         ),row=1,col=1)


fig.update_xaxes(range=(541,-2),showticklabels = False,showline = True, 
                     mirror = True,linecolor = 'black',row=1,col=1)

plot_utils.plot_time(fig,time_period,width_period,2,1)

fig.update_xaxes(range=(541,-2),ticks = 'outside',title = 'Time (Ma)',row=2,col=1)

fig.update_yaxes(title='Skeletal biomass %',showline = True, ticks = 'outside',
                     range=(0,89),mirror = True,linecolor = 'black',row=1,col=1)

fig.update_layout(height = 600, width = 1000,font_color = 'black',
                 paper_bgcolor = 'white',
                 plot_bgcolor = 'white',
                 legend=dict(
                yanchor="top",
                    y=0.99,
                    xanchor="left",
                    x=0.01))
fig.show()
fig.write_image('./figures/Figure_S9.png',scale = 6)