In [37]:
import pandas as pd
import numpy as np
from datetime import date

In [38]:
## initiate file paths
sample_outing_name = 'nov23'

In [39]:
# create file paths
if sample_outing_name != 'all_results':
    input_results_path = f"/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/{sample_outing_name}_w_df_results.csv"
else:
    input_results_path = f"/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/{sample_outing_name}.csv"

output_results_path = f"{sample_outing_name}_results_joined_SL_{date.today().strftime('%Y%m%d')}"

In [40]:
# initiate look up table file paths
sl_path = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Lookup Tables/Master_Screening_Levels.xlsx"
pcb_arc_lookup_path = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Lookup Tables/PCB_aroclor_lookup.csv"

In [41]:
results_df = pd.read_csv(input_results_path)

JOIN TABLES OF RESULTS TO MASTER SCREENING LEVELS FROM F&B

In [42]:
# create data frame of the screening levels for soils and
sl_soil_df = pd.read_excel(sl_path, sheet_name='Soil')
sl_water_df = pd.read_excel(sl_path, sheet_name='Water')

# concatenate to all screening levels
sl = pd.concat([sl_soil_df, sl_water_df])

In [43]:
# strip dioxin furans screening levels of their commas to match the results spreadsheet
sl['Chemical'] = np.where(sl['Chemical Group']== 'Dioxin Furans', sl['Chemical'].str.replace(',',''), sl['Chemical'])

# strip pcbs of their commas to match the results spreadsheet
sl['Chemical'] = np.where(sl['Chemical Group']== 'PCB', sl['Chemical'].str.replace(',',''), sl['Chemical'])

JOIN SCREENING LEVELS TO RESULTS

In [44]:
# create data frames of the raw data and the lookup
pcb_arc_lookup = pd.read_csv(pcb_arc_lookup_path)
pcb_arc_lookup['PCB Isomer'] = pcb_arc_lookup['PCB Isomer'].str.replace(",","")

In [45]:
# replace pcb names with the aroclor names to match with F&B results
sl_arc_join = pd.merge(sl, pcb_arc_lookup, how = 'outer', left_on = 'Chemical', right_on = 'PCB Isomer')
sl_arc_join['Chemical'] = np.where(sl_arc_join['Aroclor Name'].str.contains('aroclor', na=False),sl_arc_join['Aroclor Name'], sl_arc_join['Chemical'])

In [46]:
# join screening levels to the results
sl_results_join = pd.merge(sl_arc_join,results_df,how = 'outer', left_on = ['Chemical','Medium'], right_on = ['Result Parameter Name','Sample Matrix_clean'])

In [47]:
# remove screening levels that do not have values
sl_results_join = sl_results_join[(sl_results_join['Screening Level Measurement']!='na')]
sl_results_join = sl_results_join[(sl_results_join['Screening Level Measurement']!='TBD')]
sl_results_join = sl_results_join[(sl_results_join['Screening Level Measurement']!='PQL')]

In [48]:
# calculate whether the screening levels have been exceeded
sl_results_join['SL_exceeded'] = np.where(sl_results_join['Screening Level Measurement'] < sl_results_join['Result Value'],'Y','N')

In [49]:
sl_results_join['SL_diff'] = sl_results_join['Result Value'] - sl_results_join['Screening Level Measurement']

In [50]:
# where the screening level is blank, replace exceedance with "no screening level identified"
sl_results_join['Screening Level Measurement'].fillna('No Screening Level Identified', inplace = True)
sl_results_join['SL_exceeded'] = np.where(sl_results_join['Screening Level Measurement']=='No Screening Level Identified','No Screening Level Identified', sl_results_join['SL_exceeded'])

In [51]:
sl_results_join.dropna(subset=['Sample ID'], inplace=True)
sl_results_join.rename(columns = {'Field Collection Start Date': 'DATE'}, inplace = True)

In [52]:
sl_results_join['Medium'] = np.where(sl_results_join['Medium'].isna() == True, sl_results_join['Sample Matrix_clean'], sl_results_join['Medium'])
sl_results_join['Chemical'] = np.where(sl_results_join['Chemical'].isna() == True, sl_results_join['Result Parameter Name'], sl_results_join['Chemical'])

In [53]:
columns = ['DATE','Sample ID','Medium', 'Chemical Group', 'Chemical', 'Scenario',
       'Screening Level Type', 'Screening Level Measurement', 'SL Unit',
       'Source', 'Parameter in Spreadsheet','Result Value','Result Value Units','SL_exceeded', 'SL_diff']

In [54]:
sl_results_join[columns].to_csv(f'/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/{output_results_path}.csv', index = False)