This script runs on pre-processed, measured chlorophyll data.
The script "process_chlorophyll_data.R" generates these processed data from the raw data files.

This script generates the depth integrated chlorophyll data for each station where a chl measurement was made
and creates the csv file '../processed_data/depth_integrated_chl_by_station.csv'

In [1]:
import numpy as np
import pandas as pd

Note: As read in, chlMean is in ug/L unit

In [2]:
# ctd = pd.read_csv('../processed_data/ctd_chl_processed.csv')
ctd = pd.read_csv('../data_measured_chl/ctd_chl_processed.csv')

ctd_trimmed = ctd[['sampleNum', 'station', 'depth_m', 'chlMean', 'phaeoMean']].drop_duplicates().sort_values(by='depth_m')

chl_mean = ctd.groupby(['station', 'depth_m'])['chlMean'].mean().reset_index()
phaeo_mean = ctd.groupby(['station', 'depth_m'])['phaeoMean'].mean().reset_index()

ctd_merge_casts = pd.merge(chl_mean, phaeo_mean, on=['station', 'depth_m'])

# print(ctd_trimmed.loc[ctd_trimmed['station'] == 15])# for i in range(len(list of dfs)): index by station number i
# print(ctd_merge_casts[ctd_merge_casts['station']==15])

print(len(ctd), len(ctd_trimmed), len(ctd_merge_casts)) # merged casts at 5 stations


# ctd_merge_casts[ctd_merge_casts['station']==15]



2808 937 932


we do conversions here in the depth integration loop to convert to mg/m^2

In [4]:
## Note: there is no CTD cast at the deployment station for T026 (station 174)
## Data from station 167 (same day, just hours earlier) will be used for T026

traps = pd.read_csv('../data_station_meta/trap_metadata.csv')
traps_trimmed = traps[['Sample_Number', 'Deployment', 'Section_Name', 'Deploy_Station', 'Trap_Depth']]
traps_trimmed

# Replace the deployment station for trap 26 with station 167
traps_trimmed.loc[traps_trimmed['Deployment']=='T026', 'Deploy_Station'] = 167

# traps_trimmed

In [5]:
diChl_above_trap_list = []

for trap in traps_trimmed['Sample_Number']:

    meta = traps_trimmed.loc[traps_trimmed['Sample_Number'] == trap]

    df_all_depths = ctd_merge_casts.loc[ctd_merge_casts['station'] == meta['Deploy_Station'].iloc[0]]

    max_depth = np.max([np.min(df_all_depths['depth_m']), meta['Trap_Depth'].iloc[0]]) # if trap is shallower than first measurement, max_depth is first measurement, else max depth is trap depth

    df = df_all_depths.loc[df_all_depths['depth_m'] <= max_depth]

    rectangle = []
    # if np.max(df['depth_m']) == max_depth:
    if np.max(df['depth_m']) == meta['Trap_Depth'].iloc[0]:
        print(trap, 'measurement at trap depth') # this means we depth integrate as normal to the trap depth
        for j in range(len(df)-1):
            if j == 0:
                depth = df['depth_m'].iloc[j] # surface to first measurement = first_depth - 0
                chl = df['chlMean'].iloc[j] # the shallowest measurement represents the surface waters (chl in ug/L)
            else:
                depth = df['depth_m'].iloc[j+1] - df['depth_m'].iloc[j] # depth is thickness of slice between measured depths in meters
                chl = np.nanmean([df['chlMean'].iloc[j+1], df['chlMean'].iloc[j]]) # take the mean of the two chl measurements to represent the depths in between (chl in ug/L)
            
            # # Sanity Checks Within Loops
            # print(trap)
            # print('this is loop:', j)
            # print('deepest measurement above trap:', np.max(df['depth_m']))
            # print('trap depth:', max_depth)
            # print('depth this loop:', depth)
            
            wcVolume = depth*1000 # converting depth measurement in meters to units of L/m2 - this is where we assume each measurement is representative of a 1m by 1m region
            diChl = wcVolume*chl # multiply wcVolume (L/m2) by chl concentration (ug/L) to get water column chlorophyll in ug/m2
            diChl_mg_m2 = diChl / 1000 # divide by 1000ug/mg to get water column chl in mg/m2
            rectangle.append(pd.DataFrame({'diChl_mg_m2': [diChl_mg_m2]})) # appends chl value for each 'slice' of the water column into one big list

    # elif np.max(df['depth_m']) < max_depth:
    elif np.max(df['depth_m']) < meta['Trap_Depth'].iloc[0]:  
        print(trap, 'no measurement at trap depth') # we assume last measurement above the trap represents the waters between that measurement and the trap
        for j in range(len(df)): # now we go all the way to the end of the dataframe
            if j == 0: # surface water case
                depth = df['depth_m'].iloc[j] # surface to first measurement = first_depth - 0
                chl = df['chlMean'].iloc[j] # the shallowest measurement represents the surface waters (chl in ug/L)
            elif j == len(df)-1: # final measurement before trap case
                depth = max_depth - df['depth_m'].iloc[j] # trap_depth - last measurement above trap
                chl = df['chlMean'].iloc[j] # final measurement before the trap (in ug/L)
            else: # everything in between
                depth = df['depth_m'].iloc[j+1] - df['depth_m'].iloc[j] # depth is thickness of slice between measured depths in meters
                chl = np.nanmean([df['chlMean'].iloc[j+1], df['chlMean'].iloc[j]]) # take the mean of the two chl measurements to represent the depths in between (chl in ug/L)
            
            # # Sanity Checks Within Loops
            # print(trap)
            # print('this is loop:', j)
            # print('deepest measurement above trap:', np.max(df['depth_m']))
            # print('trap depth:', max_depth)
            # print('depth this loop:', depth)

            wcVolume = depth*1000 # converting depth measurement in meters to units of L/m2 - this is where we assume each measurement is representative of a 1m by 1m region
            diChl = wcVolume*chl # multiply wcVolume (L/m2) by chl concentration (ug/L) to get water column chlorophyll in ug/m2
            diChl_mg_m2 = diChl / 1000 # divide by 1000ug/mg to get water column chl in mg/m2
            rectangle.append(pd.DataFrame({'diChl_mg_m2': [diChl_mg_m2]})) # appends chl value for each 'slice' of the water column into one big list
    
    elif np.max(df['depth_m']) > meta['Trap_Depth'].iloc[0]: 
        print(trap, 'Trap is shallower than first measurement, likely ice-tethered.')
        depth = meta['Trap_Depth'].iloc[0] # surface to trap depth = trap_depth - 0
        chl = df['chlMean'].iloc[0] # the shallowest measurement represents the surface waters (chl in ug/L)

        # # Sanity Checks Within Loops
        # print(trap)
        # print('this is loop:', j)
        # print('deepest measurement above trap:', np.max(df['depth_m']))
        # print('trap depth:', max_depth)
        # print('depth this loop:', depth)
            
        wcVolume = depth*1000 # converting depth measurement in meters to units of L/m2 - this is where we assume each measurement is representative of a 1m by 1m region
        diChl = wcVolume*chl # multiply wcVolume (L/m2) by chl concentration (ug/L) to get water column chlorophyll in ug/m2
        diChl_mg_m2 = diChl / 1000 # divide by 1000ug/mg to get water column chl in mg/m2
        rectangle.append(pd.DataFrame({'diChl_mg_m2': [diChl_mg_m2]})) # appends chl value for each 'slice' of the water column into one big list
    
    else:
        print(trap, 'Something else is wrong')

    sum_of_rectangles = np.sum(rectangle) # this sums all the 'slices' generated above together to give one water column chl measurement in mg/m2
    
    diChl_above_trap_list.append(pd.DataFrame({'trap': trap, #[i],
                                               'deployment': meta['Deployment'],
                                               'station': meta['Deploy_Station'],
                                               'section': meta['Section_Name'],
                                               'trap_depth': meta['Trap_Depth'],
                                               'diChl_mg_m2': [sum_of_rectangles]}))
    
# Sample_Number	Deployment	Section_Name	Deploy_Station	Trap_Depth

diChl_above_trap_df = pd.concat(diChl_above_trap_list).sort_values(by='trap')
print(diChl_above_trap_df)
diChl_above_trap_df.to_csv('../processed_data/depth_integrated_chl_above_traps.csv', index = False)

T001S Trap is shallower than first measurement, likely ice-tethered.
T001D no measurement at trap depth
T002S measurement at trap depth
T002D no measurement at trap depth
T003S no measurement at trap depth
T003D no measurement at trap depth
T004S no measurement at trap depth
T004D no measurement at trap depth
T005S no measurement at trap depth
T005D no measurement at trap depth
T006S Trap is shallower than first measurement, likely ice-tethered.
T006D no measurement at trap depth
T007S Trap is shallower than first measurement, likely ice-tethered.
T007D no measurement at trap depth
T008S Trap is shallower than first measurement, likely ice-tethered.
T008D no measurement at trap depth
T009S no measurement at trap depth
T009D no measurement at trap depth
T010S no measurement at trap depth
T010D measurement at trap depth
T011S Trap is shallower than first measurement, likely ice-tethered.
T011D measurement at trap depth
T012S Trap is shallower than first measurement, likely ice-tethered.


In [6]:
# first measurement represents 0-first depth
# take mean of first and second measurement, multiply by difference in depths
# repeat until final measurement

stations = ctd_trimmed['station'].drop_duplicates().to_list()

dichl_list = []
for i in stations:
    df = ctd_trimmed.loc[ctd_trimmed['station'] == i]
    rectangle = []
    for j in range(len(df)-1):
        if j == 0:
            depth = df['depth_m'].iloc[j] # surface to first measurement = first_depth - 0
            chl = df['chlMean'].iloc[j] # the shallowest measurement represents the surface waters (chl in ug/L)
        else:
            depth = df['depth_m'].iloc[j+1] - df['depth_m'].iloc[j] # depth is thickness of slice between measured depths in meters
            chl = np.nanmean([df['chlMean'].iloc[j+1], df['chlMean'].iloc[j]]) # take the mean of the two chl measurements to represent the depths in between (chl in ug/L)
        wcVolume = depth*1000 # converting depth measurement in meters to units of L/m2 - this is where we assume each measurement is representative of a 1m by 1m region
        diChl = wcVolume*chl # multiply wcVolume (L/m2) by chl concentration (ug/L) to get water column chlorophyll in ug/m2
        diChl_mg = diChl / 1000 # divide by 1000ug/mg to get water column chl in mg/m2
        rectangle.append(pd.DataFrame({'diChl': [diChl_mg]})) # appends chl value for each 'slice' of the water column into one big list

    sum_of_rectangles = np.sum(rectangle) # this sums all the 'slices' generated above together to give one water column chl measurement in mg/m2

    # sanity check - making sure station number in dataframe matches station in list
    # station = df['station'].iloc[0]
    # print(i, station)

    dichl_list.append(pd.DataFrame({'station': i, #[i],
                                         'diChl': [sum_of_rectangles]}))
dichl_df = pd.concat(dichl_list).sort_values(by='station')
dichl_df.to_csv('../processed_data/depth_integrated_chl_by_station.csv', index = False)