In [647]:
import pandas as pd
import numpy as np
from datetime import date
import os

In [648]:
## initiate file paths
sample_outing_name = 's4_s6'

In [649]:
# create file paths
if os.path.exists(f"/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/{sample_outing_name}_w_df_results.csv"):
    input_results_path = f"/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/{sample_outing_name}_w_df_results.csv"
else:
    input_results_path = f"/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/{sample_outing_name}_results.csv"

output_results_path = f"{sample_outing_name}_results_joined_SL"

In [650]:
# initiate look up table file paths
sl_path = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Lookup Tables/Master_Screening_Levels.xlsx"
pcb_arc_lookup_path = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Lookup Tables/PCB_aroclor_lookup.csv"

In [651]:
results_df = pd.read_csv(input_results_path)

In [652]:
#convert units to be ug/L
results_df['Result Value'] = np.where(results_df['Result Value Units']=='pg/L', results_df['Result Value']/1000000, results_df['Result Value'])
results_df['Result Value Units'] = np.where(results_df['Result Value Units']=='pg/L', 'ug/L', results_df['Result Value Units'])

#convert units to be ug/L
results_df['Result Value'] = np.where(results_df['Result Value Units']=='mg/L', results_df['Result Value']/1000, results_df['Result Value'])
results_df['Result Value Units'] = np.where(results_df['Result Value Units']=='mg/L', 'ug/L', results_df['Result Value Units'])

#TODO calculate total pcbs for epa method

JOIN TABLES OF RESULTS TO MASTER SCREENING LEVELS FROM F&B

In [653]:
# create data frame of the screening levels for soils and
sl_soil_df = pd.read_excel(sl_path, sheet_name='Soil')
sl_water_df = pd.read_excel(sl_path, sheet_name='Water')

# concatenate to all screening levels
sl = pd.concat([sl_soil_df, sl_water_df])

In [654]:
# strip dioxin furans screening levels of their commas to match the results spreadsheet
sl['Chemical'] = np.where(sl['Chemical Group']== 'Dioxin Furans', sl['Chemical'].str.replace(',',''), sl['Chemical'])

# strip pcbs of their commas to match the results spreadsheet
sl['Chemical'] = np.where(sl['Chemical Group']== 'PCB', sl['Chemical'].str.replace(',',''), sl['Chemical'])

JOIN SCREENING LEVELS TO RESULTS

In [655]:
# create data frames of the raw data and the lookup
pcb_arc_lookup = pd.read_csv(pcb_arc_lookup_path)
pcb_arc_lookup['PCB Isomer'] = pcb_arc_lookup['PCB Isomer'].str.replace(",","")

In [656]:
# replace pcb names with the aroclor names to match with F&B results
sl_arc_join = pd.merge(sl, pcb_arc_lookup, how = 'outer', left_on = 'Chemical', right_on = 'PCB Isomer')
sl_arc_join['Chemical'] = np.where(sl_arc_join['Aroclor Name'].str.contains('aroclor', na=False),sl_arc_join['Aroclor Name'], sl_arc_join['Chemical'])

In [657]:
# join screening levels to the results
sl_results_join = pd.merge(sl_arc_join, results_df, how = 'outer', left_on = ['Chemical','Medium'], right_on = ['Result Parameter Name','Sample Matrix_clean'])

In [658]:
# remove screening levels that do not have values
sl_results_join = sl_results_join[(sl_results_join['Screening Level Measurement']!='na')]
sl_results_join = sl_results_join[(sl_results_join['Screening Level Measurement']!='TBD')]
sl_results_join = sl_results_join[(sl_results_join['Screening Level Measurement']!='PQL')]

In [659]:
# calculate whether the screening levels have been exceeded
sl_results_join['SL_exceeded'] = np.where(sl_results_join['Screening Level Measurement'] < sl_results_join['Result Value'],'Y','N')

In [660]:
sl_results_join['SL_diff'] = sl_results_join['Result Value'] - sl_results_join['Screening Level Measurement']

In [661]:
# where the screening level is blank, replace exceedance with "no screening level identified"
sl_results_join['Screening Level Measurement'].fillna('No Screening Level Identified', inplace = True)
sl_results_join['SL_exceeded'] = np.where(sl_results_join['Screening Level Measurement']=='No Screening Level Identified','No Screening Level Identified', sl_results_join['SL_exceeded'])

In [662]:
sl_results_join.dropna(subset=['Sample ID'], inplace=True)
sl_results_join.rename(columns = {'Field Collection Start Date': 'DATE'}, inplace = True)

In [663]:
sl_results_join['Medium'] = np.where(sl_results_join['Medium'].isna() == True, sl_results_join['Sample Matrix_clean'], sl_results_join['Medium'])
sl_results_join['Chemical'] = np.where(sl_results_join['Chemical'].isna() == True, sl_results_join['Result Parameter Name'], sl_results_join['Chemical'])

In [664]:
#sl_results_join[columns].to_csv(f'/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/{output_results_path}.csv', index = False)

CALCUALTE MOST STRINGENT AND COUNT OF STRINGENT EXCEEDED

In [665]:
def drop_levels(df):
    df.reset_index(inplace = True)
    df.columns = df.columns.droplevel(1)
    return df

In [666]:
sl_arc_join = sl_arc_join[sl_arc_join['Screening Level Measurement']!='N/A']
sl_arc_join = sl_arc_join[(sl_arc_join['Screening Level Measurement']!='na')]
sl_arc_join = sl_arc_join[(sl_arc_join['Screening Level Measurement']!='TBD')]
sl_arc_join = sl_arc_join[(sl_arc_join['Screening Level Measurement']!='PQL')]
sl_arc_join = sl_arc_join[(sl_arc_join['Screening Level Measurement']!='No Screening Level Identified')]
sl_arc_join['Screening Level Measurement'] = sl_arc_join['Screening Level Measurement'].astype(float)

In [667]:
# find the most stringent screening level for each scenario
sl_stringent = sl_arc_join.groupby(by =['Medium', 'Chemical Group', 'Chemical', 'Scenario']).agg({'Screening Level Measurement': ['min']}).reset_index()
sl_stringent = drop_levels(sl_stringent)

In [668]:
#For the results that signify most stringent, add column indicating stringent value for filtering
sl_results_join = sl_results_join.merge(sl_stringent, how = 'left', indicator = True)
sl_results_join['stringent_ind'] = np.where(sl_results_join['_merge']=='both', 'Stringent','')

In [669]:
columns = ['DATE','Sample ID','Medium', 'Chemical Group', 'Chemical', 'Scenario',
       'Screening Level Type', 'Screening Level Measurement', 'SL Unit',
       'Source', 'Parameter in Spreadsheet','Result Value','Result Value Units','SL_exceeded', 'SL_diff','stringent_ind']

In [670]:
sl_results_join[columns].to_csv(f'/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/{output_results_path}.csv', index = False)

FIX THIS

In [671]:
# TODO: debug issues with percentages exceeding 100. something going on with null values counting as exceeded
# count number of screening levels associated for a scenario across an entire chemical group
sl_stringent_count = sl_stringent.groupby(by =['Medium', 'Chemical Group', 'Scenario']).agg({'Screening Level Measurement': ['count']}).reset_index()
sl_stringent_count = drop_levels(sl_stringent_count)

In [672]:
# for each sample id, count how many stringent screening levels were exceeded for a scenario
sl_stringent_exceed_count = sl_stringent_exceed.groupby(by =['Medium', 'Sample ID','Chemical Group', 'Scenario']).agg({'Screening Level Measurement': ['count']}).reset_index()
sl_stringent_exceed_count = drop_levels(sl_stringent_exceed_count)

In [673]:
sl_stringent_exceed_count = sl_stringent_exceed_count.merge(sl_stringent_count, on= ['Medium', 'Chemical Group', 'Scenario'])
sl_stringent_exceed_count['pct_stingent_exceeded'] = sl_stringent_exceed_count['Screening Level Measurement_x'] / sl_stringent_exceed_count['Screening Level Measurement_y']