# D50 compositional ANOVA teste between seasons and trap type

In [2]:
import numpy as np
import pandas as pd
from scipy.stats import f_oneway
import statsmodels.api as sm
from statsmodels.formula.api import ols

spring_fine = pd.read_csv('data/D50/spring_2023_d50_LISST.csv')
summer_fine = pd.read_csv('data/D50/summer_2023_d50_LISST.csv')
grain_sizes = spring_fine['sample']

Defining function

In [3]:
def clr_manual(data):
    # compute geometric mean
    geometric_mean = np.exp(np.mean(np.log(data + 1e-20), axis=1)) # add a small number to avoid log(0)
    clr_transformed = np.log((data.T + 1e-20) / geometric_mean).T
    return clr_transformed

## Spring Fine Sediment 

Separating by flux direction and basket type

In [4]:
# separate by flux
upwelling = spring_fine[['T1-A', 'T1-B', 'T1-C', 'T1-D', 'T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = grain_sizes
downwelling = spring_fine[['T2-A', 'T2-B', 'T3-C', 'T3-D', 'T6-A', 'T6-B', 'T6-C', 'T6-D', 'T7-A', 'T7-B', 'T7-C', 'T7-D','T8-C', 'T8-D']]
downwelling.index = grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = grain_sizes
downwelling = downwelling.T
downwelling.columns = grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [5]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/d50/spring_fine_upwelling_anova_d50.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Fine Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Fine Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df         F    PR(>F)
C(trap_type)  1.386670e-32  1.0  0.771429  0.413561
Residual      1.078521e-31  6.0       NaN       NaN


Downwelling

In [6]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/d50/spring_fine_downwelling_anova_d50.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Fine Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Fine Downwelling ANOVA results for mean CLR scores:
                    sum_sq    df         F    PR(>F)
C(trap_type)  1.408680e-32   1.0  0.424779  0.526848
Residual      3.979522e-31  12.0       NaN       NaN


## Summer Fine Sediment 

Separating by flux direction and basket type

In [7]:
# separate by flux
upwelling = summer_fine[['T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = grain_sizes
downwelling = summer_fine[['T1-A', 'T1-B', 'T1-C', 'T1-D', 'T2-A', 'T2-B', 'T6-B', 'T6-C', 'T6-D', 'T7-A', 'T7-B', 'T7-C', 'T7-D','T8-C', 'T8-D']]
downwelling.index = grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = grain_sizes
downwelling = downwelling.T
downwelling.columns = grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [8]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]
# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/d50/summer_fine_upwelling_anova_d50.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Fine Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Fine Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df    F   PR(>F)
C(trap_type)  2.773339e-32  1.0  1.0  0.42265
Residual      5.546678e-32  2.0  NaN      NaN


Downwelling

In [9]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/d50/summer_fine_downwelling_anova_d50.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Fine Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Fine Downwelling ANOVA results for mean CLR scores:
                    sum_sq    df         F    PR(>F)
C(trap_type)  5.869501e-35   1.0  0.002299  0.962488
Residual      3.319203e-31  13.0       NaN       NaN


## Spring Combined Sediment 

Separating by flux direction and basket type

In [10]:
# separate by flux
upwelling = spring_combined[['T1-A', 'T1-B', 'T1-C', 'T1-D', 'T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = combined_grain_sizes
downwelling = spring_combined[['T2-A', 'T2-B', 'T3-C', 'T3-D', 'T6-A', 'T6-B', 'T6-C', 'T6-D', 'T7-A', 'T7-B', 'T7-C', 'T7-D','T8-C', 'T8-D']]
downwelling.index = combined_grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = combined_grain_sizes
downwelling = downwelling.T
downwelling.columns = combined_grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [11]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/sizeclass/spring_combined_upwelling_anova_sizeclass.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Combined Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Combined Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df         F    PR(>F)
C(trap_type)  2.465190e-32  1.0  0.888889  0.382175
Residual      1.664003e-31  6.0       NaN       NaN


Downwelling

In [12]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/sizeclass/spring_combined_downwelling_anova_sizeclass.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Combined Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Combined Downwelling ANOVA results for mean CLR scores:
                    sum_sq    df     F    PR(>F)
C(trap_type)  3.169530e-32   1.0  0.75  0.403459
Residual      5.071249e-31  12.0   NaN       NaN


## Summer Combined Sediment 

Separating by flux direction and basket type

In [13]:
# separate by flux
upwelling = summer_combined[['T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = combined_grain_sizes
downwelling = summer_combined[['T1-A', 'T1-B', 'T1-C', 'T1-D', 'T2-A', 'T2-B', 'T6-B', 'T6-C', 'T6-D', 'T7-A', 'T7-B', 'T7-C', 'T7-D','T8-C', 'T8-D']]
downwelling.index = combined_grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = combined_grain_sizes
downwelling = downwelling.T
downwelling.columns = combined_grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [14]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]
# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/sizeclass/summer_combined_upwelling_anova_sizeclass.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Combined Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Combined Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df     F    PR(>F)
C(trap_type)  1.301929e-31  1.0  13.0  0.069051
Residual      2.002967e-32  2.0   NaN       NaN


Downwelling

In [15]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/sizeclass/summer_combined_downwelling_anova_sizeclass.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Combined Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Combined Downwelling ANOVA results for mean CLR scores:
                    sum_sq    df         F    PR(>F)
C(trap_type)  6.166644e-33   1.0  0.401894  0.537108
Residual      1.994713e-31  13.0       NaN       NaN


# Compositional ANOVA with extreme baskets only 

## Spring Fine Sediment

Separating by flux direction and basket type

In [14]:
# separate by flux
upwelling = spring_fine[['T1-A', 'T1-B', 'T1-C', 'T1-D']]
upwelling.index = grain_sizes
downwelling = spring_fine[['T2-A', 'T2-B', 'T3-C', 'T3-D']]
downwelling.index = grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = grain_sizes
downwelling = downwelling.T
downwelling.columns = grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [15]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/d50/extremes_spring_fine_upwelling_anova_d50.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Extremes Spring Combined Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Extremes Spring Combined Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df             F  PR(>F)
C(trap_type)  1.367362e-63  1.0  5.546678e-32     1.0
Residual      4.930381e-32  2.0           NaN     NaN


Downwelling

In [16]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/d50/extremes_spring_fine_downwelling_anova_d50.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Spring Combined Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Spring Combined Downwelling ANOVA results for mean CLR scores:
                    sum_sq   df             F  PR(>F)
C(trap_type)  6.077163e-64  1.0  6.162976e-33     1.0
Residual      1.972152e-31  2.0           NaN     NaN


## Summer Fine Sediment

Separating by flux direction and basket type

In [17]:
# separate by flux
upwelling = summer_fine[['T5-A', 'T5-B', 'T5-C', 'T5-D']]
upwelling.index = grain_sizes
downwelling = summer_fine[['T2-A', 'T2-B', 'T7-A', 'T7-B', 'T7-C', 'T7-D']]
downwelling.index = grain_sizes

# transpose and make the first row the column names
upwelling = upwelling.T
upwelling.columns = grain_sizes
downwelling = downwelling.T
downwelling.columns = grain_sizes

# add trap type column 
# if the index ends in A or D its closed, if it ends in B or C its open
upwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in upwelling.index]
downwelling['trap_type'] = ['closed' if i[-1] in ['A', 'D'] else 'open' for i in downwelling.index]

Upwelling

In [20]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(upwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = upwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]
# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/d50/extremes_summer_fine_upwelling_anova_d50.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Extremes Combined Upwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Extremes Combined Upwelling ANOVA results for mean CLR scores:
                    sum_sq   df    F   PR(>F)
C(trap_type)  2.773339e-32  1.0  1.0  0.42265
Residual      5.546678e-32  2.0  NaN      NaN


Downwelling

In [21]:
# apply CLR transformation to the data (excluding the 'trap_type' column)
clr_transformed_data = clr_manual(downwelling.drop('trap_type', axis=1))
# convert CLR data to DataFrame and add the TrapType column back
clr_df = pd.DataFrame(clr_transformed_data)
clr_df['trap_type'] = downwelling['trap_type']
# rename columns to not have a dot for the decimals in the name
clr_df.columns = ['size_' + str(col).replace('.', '') if str(col)[0].isdigit() else str(col) for col in clr_df.columns]

# conduct ANOVA on each sizes CLR transformed data
anova_results = {}
for column in clr_df.columns[:-1]:  # exclude 'trap_type' column
    model = ols(f'{column} ~ C(trap_type)', data=clr_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[column] = anova_table

# display ANOVA results for each element
results = []
for element, result in anova_results.items():
    #print(f"ANOVA results for {element}:")
    #print(result, "\n")
    results.append(result)
# concatenate all results into a single DataFrame
results_df = pd.concat(results)
# save the results to a CSV file
results_df.to_csv('results/d50/extremes_summer_fine_downwelling_anova_d50.csv', index=False)

# overall ANOVA using the mean clr scores across elements
clr_df['mean_clr'] = clr_df.drop('trap_type', axis=1).mean(axis=1)
model_mean_clr = ols('mean_clr ~ C(trap_type)', data=clr_df).fit()
anova_mean_clr = sm.stats.anova_lm(model_mean_clr, typ=2)
print("Summer Extremes Combined Downwelling ANOVA results for mean CLR scores:")
print(anova_mean_clr)

Summer Extremes Combined Downwelling ANOVA results for mean CLR scores:
                    sum_sq   df         F    PR(>F)
C(trap_type)  1.848893e-32  1.0  0.529412  0.507158
Residual      1.396941e-31  4.0       NaN       NaN
