In [1]:
import numpy as np
import pandas as pd 
import statsmodels.api as sm
from statsmodels.formula.api import ols
import utils
from scipy import stats



# Anova tests

## Period level

In [2]:
data_file = './data/paleo_meso_ceno_data_period.csv'

data = pd.read_csv(data_file)
data = data.sort_values(by=['mean_ma'], ascending=False)
data.skeletal_total = pd.to_numeric(data.skeletal_total)

# reef vs non reef

In [3]:
# reef only data
reefs =['patchReefs', 'reef','interReef','mudMound','foreReef', 'bioherm', 'patchReef','backReef']

reef_index = data[data['depo_env2'].isin(reefs)].index
non_reef_index = data[~data['depo_env2'].isin(reefs)].index

data.loc[reef_index,'reef_classification'] = 'reef'
data.loc[non_reef_index,'reef_classification'] = 'non reef'

data = data.sort_values(by=['mean_ma'], ascending=False)

In [4]:
#perform two-way ANOVA
# framework: ramp vs platform
model = ols('skeletal_total ~ C(period) + C(reef_classification) ', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(period),1571363.0,11.0,331.17606,0.0
C(reef_classification),118476.0,1.0,274.666423,1.201921e-60
Residual,3336886.0,7736.0,,


In [5]:
an.loc['C(period)'].sum_sq/an['sum_sq'].sum(), an.loc['C(reef_classification)'].sum_sq/an['sum_sq'].sum()

(0.3126017448046417, 0.023569227548949565)

# Read epoch level data for the following anova tests

In [6]:
data_file = './data/paleo_meso_ceno_data_paleogeography_epoch.csv'

data = pd.read_csv(data_file)
data = data.sort_values(by=['mean_ma'], ascending=False)

In [7]:
data.skeletal_total = pd.to_numeric(data.skeletal_total)

In [8]:
water_depth = []
for _,dat in data.iterrows():
    if (dat.depo_env1=='distalRamp') or (dat.depo_env1=='outerShelf'):
        water_depth.append('Deep')
    else:
        water_depth.append('Shallow')
        
data['water_depth'] = water_depth

## time and depositional setting

In [11]:
#perform two-way ANOVA
# framework: ramp vs platform
model = ols('skeletal_total ~ C(epoch) + C(framework) ', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),1842727.0,32.0,150.727601,0.0
C(framework),26056.96,2.0,34.101653,1.798577e-15
Residual,2947121.0,7714.0,,


In [12]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['C(framework)'].sum_sq/an['sum_sq'].sum()

(0.3826336434455368, 0.0054106047238039095)

In [60]:
#perform two-way ANOVA
# depo_env1: location on ramp vs location on platform
model = ols('skeletal_total ~ C(epoch) + C(depo_env1) ', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),1700846.0,32.0,148.274564,0.0
C(depo_env1),209402.6,6.0,97.360441,1.978378e-118
Residual,2763775.0,7710.0,,


In [61]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['C(depo_env1)'].sum_sq/an['sum_sq'].sum()

(0.36389329864840875, 0.04480136073007238)

In [18]:
#perform two-way ANOVA
# water_depth: deep or shallow
model = ols('skeletal_total ~ C(epoch) + C(water_depth)', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),1941334.0,32.0,157.46658,0.0
C(water_depth),842.7288,1.0,2.187389,0.139186
Residual,2972335.0,7715.0,,


In [19]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['C(water_depth)'].sum_sq/an['sum_sq'].sum()

(0.39502069381942045, 0.0001714776407615819)

## time and paleolat setting

In [20]:
#perform two-way ANOVA
model = ols('skeletal_total ~ C(epoch) + C(paleo_geography)', data=data).fit()
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),1947804.0,32.0,158.357823,0.0
C(paleo_geography),7720.35,1.0,20.085436,8e-06
Residual,2965457.0,7715.0,,


In [21]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['C(paleo_geography)'].sum_sq/an['sum_sq'].sum()

(0.39581614391791947, 0.0015688638796896692)

## time and Chemistry [Aragonite and calcite]

In [24]:
chemical_composition = pd.read_excel('./data/chemistry_data.xlsx',sheet_name='chemistry')
epoch_chemical_unique = np.unique(chemical_composition.Epoch)
chemical_composition.set_index('Epoch',inplace=True)

skeletal_chemical = []
epoch_chemical = []
chemistry = []
for _,dat in data.iterrows():
    if dat.epoch in epoch_chemical_unique:
        skeletal_chemical.append(dat.skeletal_total)
        epoch_chemical.append(dat.epoch)
        chemistry.append(chemical_composition['chemistry'][dat.epoch])
chemistry_df = pd.DataFrame({'skeletal_total':skeletal_chemical,'chemistry':chemistry,'epoch':epoch_chemical})

In [25]:
#perform one-way ANOVA
model = ols('skeletal_total ~ C(chemistry)', data=chemistry_df).fit()
an = sm.stats.anova_lm(model, typ=1)
an



Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(chemistry),1.0,2.368333,2.368333,0.003698,0.951513
Residual,7369.0,4719631.0,640.471012,,


In [26]:
an.loc['C(chemistry)'].sum_sq/an['sum_sq'].sum()

5.018044204354113e-07

## Time and number of samples

In [None]:
# Fit the two-way ANOVA model
model = ols('skeletal_total ~ C(epoch) + numberOfSamples', data=data).fit()

# Perform ANOVA
an = sm.stats.anova_lm(model, typ=2)
an



Unnamed: 0,sum_sq,df,F,PR(>F)
C(epoch),1881857.0,32.0,152.598283,0.0
numberOfSamples,13.81017,1.0,0.035835,0.849861
Residual,2973192.0,7715.0,,


In [28]:
an.loc['C(epoch)'].sum_sq/an['sum_sq'].sum(), an.loc['numberOfSamples'].sum_sq/an['sum_sq'].sum()

(0.38760715503576676, 2.844488849162627e-06)