# Compositional ANOVA teste between seasons and trap type

In [18]:
import numpy as np
import pandas as pd
from scipy.stats import f_oneway
import statsmodels.api as sm
from statsmodels.formula.api import ols

spring_fine = pd.read_csv('data/spring_2023_basket_LISST.csv')
spring_combined = pd.read_csv('data/spring_2023_basket_combined.csv')
summer_fine = pd.read_csv('data/summer_2023_basket_LISST.csv')
summer_combined = pd.read_csv('data/summer_2023_basket_combined.csv')
grain_sizes = spring_fine['sample']
combined_grain_sizes = spring_combined['sample']

Defining function

In [19]:
def clr_manual(data):
    # compute geometric mean
    geometric_mean = np.exp(np.mean(np.log(data + 1e-20), axis=1)) # add a small number to avoid log(0)
    clr_transformed = np.log((data.T + 1e-20) / geometric_mean).T
    return clr_transformed

## Spring Fine Sediment 

Separating by flux direction and basket type

In [3]:
# separate by flux
upwelling = spring_fine[['T1-A', 'T1-B', 'T1-C', 'T1-D', 'T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = grain_sizes
downwelling = spring_fine[['T2-A', 'T2-B', 'T3-C', 'T3-D', 'T6-A', 'T6-B', 'T6-C', 'T6-D', 'T7-A', 'T7-B', 'T7-C', 'T7-D','T8-C', 'T8-D']]
downwelling.index = grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = grain_sizes
downwelling = downwelling.T
downwelling.columns = grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [4]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/spring_fine_upwelling_anova_results.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Fine Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Fine Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df         F    PR(>F)
C(trap_type)  6.446294e-35  1.0  0.000956  0.976333
Residual      4.044373e-31  6.0       NaN       NaN


Downwelling

In [12]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/spring_fine_downwelling_anova_results.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Fine Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Fine Downwelling ANOVA results for mean CLR scores:
                    sum_sq    df         F    PR(>F)
C(trap_type)  6.512407e-34   1.0  0.002521  0.960722
Residual      3.358759e-30  13.0       NaN       NaN


## Summer Fine Sediment 

Separating by flux direction and basket type

In [6]:
# separate by flux
upwelling = summer_fine[['T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = grain_sizes
downwelling = summer_fine[['T1-A', 'T1-B', 'T1-C', 'T1-D', 'T2-A', 'T2-B', 'T6-B', 'T6-C', 'T6-D', 'T7-A', 'T7-B', 'T7-C', 'T7-D','T8-C', 'T8-D']]
downwelling.index = grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = grain_sizes
downwelling = downwelling.T
downwelling.columns = grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [9]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]
# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/summer_fine_upwelling_anova_results.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Fine Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Fine Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df         F    PR(>F)
C(trap_type)  7.230610e-31  1.0  0.378028  0.601293
Residual      3.825438e-30  2.0       NaN       NaN


Downwelling

In [11]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/summer_fine_downwelling_anova_results.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Fine Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Fine Downwelling ANOVA results for mean CLR scores:
                    sum_sq    df         F    PR(>F)
C(trap_type)  6.512407e-34   1.0  0.002521  0.960722
Residual      3.358759e-30  13.0       NaN       NaN


## Spring Combined Sediment 

Separating by flux direction and basket type

In [27]:
# separate by flux
upwelling = spring_combined[['T1-A', 'T1-B', 'T1-C', 'T1-D', 'T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = combined_grain_sizes
downwelling = spring_combined[['T2-A', 'T2-B', 'T3-C', 'T3-D', 'T6-A', 'T6-B', 'T6-C', 'T6-D', 'T7-A', 'T7-B', 'T7-C', 'T7-D','T8-C', 'T8-D']]
downwelling.index = combined_grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = combined_grain_sizes
downwelling = downwelling.T
downwelling.columns = combined_grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [28]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/spring_combined_upwelling_anova_results.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Combined Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Combined Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df         F    PR(>F)
C(trap_type)  9.705324e-31  1.0  5.405405  0.059047
Residual      1.077291e-30  6.0       NaN       NaN


Downwelling

In [29]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/spring_combined_downwelling_anova_results.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Combined Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Combined Downwelling ANOVA results for mean CLR scores:
                    sum_sq    df        F    PR(>F)
C(trap_type)  1.386475e-31   1.0  0.27959  0.606612
Residual      5.950750e-30  12.0      NaN       NaN


## Summer Combined Sediment 

Separating by flux direction and basket type

In [30]:
# separate by flux
upwelling = summer_combined[['T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = combined_grain_sizes
downwelling = summer_combined[['T1-A', 'T1-B', 'T1-C', 'T1-D', 'T2-A', 'T2-B', 'T6-B', 'T6-C', 'T6-D', 'T7-A', 'T7-B', 'T7-C', 'T7-D','T8-C', 'T8-D']]
downwelling.index = combined_grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = combined_grain_sizes
downwelling = downwelling.T
downwelling.columns = combined_grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [31]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]
# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/summer_combined_upwelling_anova_results.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Combined Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Combined Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df         F    PR(>F)
C(trap_type)  6.987834e-31  1.0  2.769231  0.237999
Residual      5.046769e-31  2.0       NaN       NaN


Downwelling

In [32]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/summer_combined_downwelling_anova_results.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Combined Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Combined Downwelling ANOVA results for mean CLR scores:
                    sum_sq    df         F   PR(>F)
C(trap_type)  5.841912e-31   1.0  1.878285  0.19373
Residual      4.043308e-30  13.0       NaN      NaN
