In [1]:
import numpy as np
import scipy 
import pandas as pd 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels.api as sm
from statsmodels.formula.api import ols
import utils
import plot_utils
import random
from scipy import stats



# Read data

In [2]:
data_file = './data/paleo_meso_ceno_data_june_paleogeography.csv'

data = pd.read_csv(data_file)
data = data.sort_values(by=['mean_ma'], ascending=False)

In [3]:
data.skeletal_total = pd.to_numeric(data.skeletal_total)

In [4]:
water_depth = []
for _,dat in data.iterrows():
    if (dat.depo_env1=='distalRamp') or (dat.depo_env1=='outerShelf'):
        water_depth.append('Deep')
    else:
        water_depth.append('Shallow')
        
data['water_depth'] = water_depth

# Anova tests

## time and depositional setting

In [16]:
#perform two-way ANOVA
# framework: ramp vs platform
model = ols('skeletal_total ~ C(epoch) + C(framework) ', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),1895514.0,32.0,157.431503,0.0
C(framework),37011.1,2.0,49.183199,6.092872e-22
Residual,2704916.0,7189.0,,


In [17]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['C(framework)'].sum_sq/an['sum_sq'].sum()

(0.4087413203453251, 0.007980933453386423)

In [18]:
#perform two-way ANOVA
# depo_env1: location on ramp vs location on platform
model = ols('skeletal_total ~ C(epoch) + C(depo_env1) ', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),1709715.0,32.0,150.470772,0.0
C(depo_env1),190704.5,6.0,89.513414,1.0606969999999999e-108
Residual,2551222.0,7185.0,,


In [19]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['C(depo_env1)'].sum_sq/an['sum_sq'].sum()

(0.38406388656956514, 0.04283913746179643)

In [27]:
#perform two-way ANOVA
# water_depth: deep or shallow
model = ols('skeletal_total ~ C(epoch) + C(water_depth)', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),1986406.0,32.0,162.780553,0.0
C(water_depth),72.48376,1.0,0.190075,0.662867
Residual,2741854.0,7190.0,,


In [26]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['C(water_depth)'].sum_sq/an['sum_sq'].sum()

(0.42010704244914077, 1.5329664072471083e-05)

## time and paleolat setting

In [23]:
#perform two-way ANOVA
model = ols('skeletal_total ~ C(epoch) + C(paleo_geography)', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),2002128.0,32.0,164.901029,0.0
C(paleo_geography),2172.027,1.0,5.724619,0.016754
Residual,2724984.0,7182.0,,


In [24]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['C(paleo_geography)'].sum_sq/an['sum_sq'].sum()

(0.42334699011773474, 0.0004592717538047158)

## time and Chemistry [Aragonite and calcite]

In [32]:
chemical_composition = pd.read_excel('./data/chemistry_data.xlsx',sheet_name='chemistry')
epoch_chemical_unique = np.unique(chemical_composition.Epoch)
chemical_composition.set_index('Epoch',inplace=True)

skeletal_chemical = []
epoch_chemical = []
chemistry = []
for _,dat in data.iterrows():
    if dat.epoch in epoch_chemical_unique:
        skeletal_chemical.append(dat.skeletal_total)
        epoch_chemical.append(dat.epoch)
        chemistry.append(chemical_composition['chemistry'][dat.epoch])
chemistry_df = pd.DataFrame({'skeletal_total':skeletal_chemical,'chemistry':chemistry,'epoch':epoch_chemical})

In [33]:
#perform one-way ANOVA
model = ols('skeletal_total ~ C(chemistry)', data=chemistry_df).fit()
an = sm.stats.anova_lm(model, typ=1)
an



Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(chemistry),1.0,233.429,233.428985,0.35274,0.552586
Residual,6863.0,4541658.0,661.759874,,


In [31]:
an.loc['C(chemistry)'].sum_sq/an['sum_sq'].sum()

5.1394663975783375e-05

# Spearman correlation

In [34]:
## Prepare data
data_skeletal = utils.get_stats(data,'skeletal_total','epoch')
data_skeletal = data_skeletal.sort_values(by=['time'], ascending=True)

data_algamicro = utils.get_stats(data,'total_alga_new','epoch')
data_animal = utils.get_stats(data,'total_animals_new','epoch')
data_protist = utils.get_stats(data,'total_protists_new','epoch')

data_animal = data_animal.sort_values(by=['time'], ascending=True)
data_algamicro = data_algamicro.sort_values(by=['time'], ascending=True)
data_protist = data_protist.sort_values(by=['time'], ascending=True)

data_protist.dropna(inplace=True)
data_algamicro.dropna(inplace=True)

## diversity data
animal_diversity_carb = pd.read_csv('./data/SQS_diversity_data/animal_diversity_carbonates_only.csv')
alga_diversity_carb = pd.read_csv('./data/SQS_diversity_data/alga_diversity_carbonates_only.csv')
protist_diversity_carb = pd.read_csv('./data/SQS_diversity_data/protist_diversity_carbonates_only.csv')

animal_diversity = pd.read_csv('./data/SQS_diversity_data/animal_diversity.csv')
alga_diversity = pd.read_csv('./data/SQS_diversity_data/alga_diversity.csv')
protist_diversity = pd.read_csv('./data/SQS_diversity_data/protist_diversity.csv')

animal_diversity.dropna(subset=['SQS_diversity'], inplace=True)
protist_diversity.dropna(subset=['SQS_diversity'], inplace=True)
alga_diversity.dropna(subset=['SQS_diversity'], inplace=True)

## Sepkoski diversity data
sep_diversity = utils.load_sepkoski_diversity_data()

## Sepkoski diversity and biomass

In [35]:
# supkowski correlation

common_time = list(set(sep_diversity.epoch).intersection(set(data_skeletal.epoch)))

div_corr = sep_diversity[sep_diversity.epoch.isin(common_time)]
div_corr = div_corr.sort_values(by='mean_ma',ascending=True)
abundance_corr = data_skeletal[data_skeletal.epoch.isin(common_time)]

res = stats.spearmanr(list(div_corr['diversity']), list(abundance_corr['mean']))
print(f'Total: {res}')

Total: SignificanceResult(statistic=0.785539488320356, pvalue=2.7187140432533034e-07)


## All litho diversity and biomass

In [36]:
## animal diversity and animal biomass

common_time = list(set(animal_diversity.epoch).intersection(set(data_animal.epoch)))
div_corr = animal_diversity[animal_diversity.epoch.isin(common_time)]
div_corr = div_corr.sort_values(by='mean_ma',ascending=True)
abundance_corr = data_animal[data_animal.epoch.isin(common_time)]

res = stats.spearmanr(list(div_corr['SQS_diversity']), list(abundance_corr['mean']))
print(f'Animal: {res}')




In [37]:
## alga diversity and alga biomass

common_time = list(set(alga_diversity.epoch).intersection(set(data_algamicro.epoch)))
div_corr = alga_diversity[alga_diversity.epoch.isin(common_time)]
div_corr = div_corr.sort_values(by='mean_ma',ascending=True)
abundance_corr = data_algamicro[data_algamicro.epoch.isin(common_time)]
res = stats.spearmanr(list(div_corr['SQS_diversity']), list(abundance_corr['mean']))
print(f'Alga: {res}')





In [38]:
## protist diversity and protist biomass

common_time = list(set(protist_diversity.epoch).intersection(set(data_protist.epoch)))
div_corr = protist_diversity[protist_diversity.epoch.isin(common_time)]
div_corr = div_corr.sort_values(by='mean_ma',ascending=True)
abundance_corr = data_protist[data_protist.epoch.isin(common_time)]
res = stats.spearmanr(list(div_corr['SQS_diversity']), list(abundance_corr['mean']))
print(f'Protist: {res}')

Protist: SignificanceResult(statistic=0.8470859937423051, pvalue=9.280414984522052e-08)


## Carbonate only diversity and biomass

In [9]:
## animal diversity and animal biomass

common_time = list(set(animal_diversity_carb.epoch).intersection(set(data_animal.epoch)))
div_corr = animal_diversity_carb[animal_diversity_carb.epoch.isin(common_time)]
div_corr = div_corr.sort_values(by='mean_ma',ascending=True)
abundance_corr = data_animal[data_animal.epoch.isin(common_time)]

res = stats.spearmanr(list(div_corr['SQS_diversity']), list(abundance_corr['mean']))
print(f'Animal: {res}')




In [10]:
## alga diversity and alga biomass

common_time = list(set(alga_diversity_carb.epoch).intersection(set(data_algamicro.epoch)))
div_corr = alga_diversity_carb[alga_diversity_carb.epoch.isin(common_time)]
div_corr = div_corr.sort_values(by='mean_ma',ascending=True)
abundance_corr = data_algamicro[data_algamicro.epoch.isin(common_time)]
res = stats.spearmanr(list(div_corr['SQS_diversity']), list(abundance_corr['mean']))
print(f'Alga: {res}')


Alga: SignificanceResult(statistic=0.5392374843728079, pvalue=0.0044732057389988665)


In [11]:
## protist diversity and protist biomass

common_time = list(set(protist_diversity_carb.epoch).intersection(set(data_protist.epoch)))
div_corr = protist_diversity_carb[protist_diversity_carb.epoch.isin(common_time)]
div_corr = div_corr.sort_values(by='mean_ma',ascending=True)
abundance_corr = data_protist[data_protist.epoch.isin(common_time)]
res = stats.spearmanr(list(div_corr['SQS_diversity']), list(abundance_corr['mean']))
print(f'Protist: {res}')

Protist: SignificanceResult(statistic=0.7552905522524335, pvalue=1.2748535120745491e-05)
