## Plotting GSDs for all basket trap pairs

Importing Libraries

In [1]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd
from scipy.stats import wasserstein_distance
import seaborn as sns
import os

Defining Functions

In [2]:
# removing outlier GSDs
def remove_outlier_gsd(matrix):
    # calculating representative sizes
    matrix_d16 = np.percentile(matrix, 16, axis=0)
    matrix_d50 = np.percentile(matrix, 50, axis=0)
    matrix_d84 = np.percentile(matrix, 84, axis=0)
    # calculating Quartile 1
    tempCalc1 = np.percentile(matrix_d16, 25)
    tempCalc2 = np.percentile(matrix_d50, 25)
    tempCalc3 = np.percentile(matrix_d84, 25)
    matrix_q1 = np.array((tempCalc1, tempCalc2, tempCalc3)) # d16, d50, d84
    # calculating Quartile 3
    tempCalc1 = np.percentile(matrix_d16, 75)
    tempCalc2 = np.percentile(matrix_d50, 75)
    tempCalc3 = np.percentile(matrix_d84, 75)
    matrix_q3 = np.array((tempCalc1, tempCalc2, tempCalc3)) # d16, d50, d84
    # calculating IQR and lower/upper bounds
    spring_wc_IQR = matrix_q3 - matrix_q1
    lower_bound = matrix_q1 - 1.5*spring_wc_IQR
    upper_bound = matrix_q3 + 1.5*spring_wc_IQR
    # identifying GSDs with representative sizes outside the range
    tempBool1 = np.where(matrix_d16 < lower_bound[0], True, False)
    tempBool2 = np.where(matrix_d50 < lower_bound[1], True, False)
    tempBool3 = np.where(matrix_d84 < lower_bound[2], True, False)
    tempBool1 + tempBool2 + tempBool3
    np.arange(len(tempBool1))[tempBool1 + tempBool2 + tempBool3]
    # identifying GSDs with representative sizes outside the range
    tempBool4 = np.where(matrix_d16 > upper_bound[0], True, False)
    tempBool5 = np.where(matrix_d50 > upper_bound[1], True, False)
    tempBool6 = np.where(matrix_d84 > upper_bound[2], True, False)
    tempBool4 + tempBool5 + tempBool6
    np.arange(len(tempBool4))[tempBool4 + tempBool5 + tempBool6]
    # deleting outlier GSDs
    tempCalc1 = np.arange(len(tempBool1))[tempBool1 + tempBool2 + tempBool3 + tempBool4 + tempBool5 + tempBool6]
    tempCalc2 = np.delete(matrix, tempCalc1, axis=1)
    
    return tempCalc2

Improting Data

In [3]:
# checking environment and working directory
os.getcwd()
working_directory = "C:/Users/huck4481/Documents/GitHub/sediment_trap_paper/trap_GSDs/"
os.chdir(working_directory)

# filenames and directories
results_directory = "./plots"
filename1 = "./fines/SM23_percentages.csv"
filename2 = "./combined/SM23_percentages_combined.csv"
filename3 = "./fines/SP23_percentages.csv"
filename4 = "./combined/SP23_percentages_combined.csv"

# reading files
dataframe1 = pd.read_csv(filename1)
dataframe2 = pd.read_csv(filename2)
dataframe3 = pd.read_csv(filename3)
dataframe4 = pd.read_csv(filename4)

# getting information
grain_sizes = np.array(dataframe1.iloc[:-1, 0])
proportion1 = np.array(dataframe1.iloc[:-1, 1:]) # summer fines
proportion2 = np.array(dataframe2.iloc[:-1, 1:]) # summer combined
proportion3 = np.array(dataframe3.iloc[:-1, 1:]) # spring fines
proportion4 = np.array(dataframe4.iloc[:-1, 1:]) # spring combined

Calculations

In [None]:
# calculating cumulative sum
summer_fine_gsd = np.cumsum(proportion1, axis=0)
summer_combined_gsd = np.cumsum(proportion2, axis=0)
spring_fine_gsd = np.cumsum(proportion3, axis=0)
spring_combined_gsd = np.cumsum(proportion4, axis=0)

# calculating max, avg and min GSDs of the baskets
# summer
sm_fine_gsd_max = pd.DataFrame(np.max(summer_fine_gsd, axis=1))
sm_fine_gsd_avg = pd.DataFrame(np.mean(summer_fine_gsd, axis=1))
sm_fine_gsd_min = pd.DataFrame(np.min(summer_fine_gsd, axis=1))
sm_combined_gsd_max = pd.DataFrame(np.max(summer_combined_gsd, axis=1))
sm_combined_gsd_avg = pd.DataFrame(np.mean(summer_combined_gsd, axis=1))
sm_combined_gsd_min = pd.DataFrame(np.min(summer_combined_gsd, axis=1))
# spring
sp_fine_gsd_max = pd.DataFrame(np.max(spring_fine_gsd, axis=1))
sp_fine_gsd_avg = pd.DataFrame(np.mean(spring_fine_gsd, axis=1))
sp_fine_gsd_min = pd.DataFrame(np.min(spring_fine_gsd, axis=1))
sp_combined_gsd_max = pd.DataFrame(np.max(spring_combined_gsd, axis=1))
sp_combined_gsd_avg = pd.DataFrame(np.mean(spring_combined_gsd, axis=1))
sp_combined_gsd_min = pd.DataFrame(np.min(spring_combined_gsd, axis=1))

# convert to pandas dataframe
summer_fine_gsd = pd.DataFrame(summer_fine_gsd)
summer_combined_gsd = pd.DataFrame(summer_combined_gsd)
spring_fine_gsd = pd.DataFrame(spring_fine_gsd)
spring_combined_gsd = pd.DataFrame(spring_combined_gsd)

# column names of dataframes are the same as the ones from dataframe1, 2, 3 and 4
summer_fine_gsd.columns = dataframe2.columns[1:]
summer_combined_gsd.columns = dataframe2.columns[1:]
summer_st_gsd.columns = dataframe4.columns[1:]

# add max, avg and min GSDs to the dataframes as new columns
spring_wc_gsd["max"] = spring_wc_gsd_max
spring_wc_gsd["avg"] = spring_wc_gsd_avg
spring_wc_gsd["min"] = spring_wc_gsd_min
summer_wc_gsd["max"] = summer_wc_gsd_max
summer_wc_gsd["avg"] = summer_wc_gsd_avg
summer_wc_gsd["min"] = summer_wc_gsd_min

# make the index of the dataframes the values in grain_sizes array
spring_wc_gsd.index = grain_sizes
spring_st_gsd.index = grain_sizes
summer_wc_gsd.index = grain_sizes
summer_st_gsd.index = grain_sizes