In [3]:
import pandas as pd
import geopandas as gp
import numpy as np
import os
import itertools

JOIN TABLES OF RESULTS TO MASTER SCREENING LEVELS FROM F&B

In [42]:
# bring in screening results and master screening level spreadsheets
results = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/S4 and S6 Screening Results_CLEAN.xlsx"
sl = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Master_Screening_Levels.xlsx"

In [43]:
# create data frames of the raw data and the lookup
results_df = pd.read_excel(results, sheet_name='RAW_DATA')
pcb_arc_lookup = pd.read_excel(results, sheet_name='PCB to Aroclor Lookup')

# create data frame of the screening levels for soils and
sl_soil_df = pd.read_excel(sl, sheet_name='Soil')
sl_water_df = pd.read_excel(sl, sheet_name='Water')

In [44]:
# format pcb to aroclor loo
pcb_arc_lookup = pcb_arc_lookup[pcb_arc_lookup['Aroclor Name'].str.contains('aroclor', na=False)][['PCB Isomer', 'Aroclor Name']]

In [45]:
# replace pcb names with the aroclor names to match with F&B results
sl_soil_df_join = pd.merge(sl_soil_df, pcb_arc_lookup, how = 'outer', left_on = 'Chemical', right_on = 'PCB Isomer')
sl_soil_df_join['Chemical'] = np.where(sl_soil_df_join['Aroclor Name'].str.contains('aroclor', na=False), sl_soil_df_join['Aroclor Name'], sl_soil_df_join['Chemical'])
sl_soil_df_join.drop(columns = pcb_arc_lookup.columns, inplace = True)
sl_soil_df_join = sl_soil_df_join[sl_soil_df_join['Medium']=='Soil']

In [46]:
# replace pcb names with the aroclor names to match with F&B results
sl_water_df_join = pd.merge(sl_water_df, pcb_arc_lookup, how = 'outer', left_on = 'Chemical', right_on = 'PCB Isomer')
sl_water_df_join['Chemical'] = np.where(sl_water_df_join['Aroclor Name'].str.contains('aroclor', na=False), sl_water_df_join['Aroclor Name'], sl_water_df_join['Chemical'])
sl_water_df_join.drop(columns = pcb_arc_lookup.columns, inplace = True)
sl_water_df_join = sl_water_df_join[sl_water_df_join['Medium']=='Water']

In [47]:
# remove unnecessary columns from raw data
results_df = results_df[['Sample ID','Field Collection Start Date','Sample Matrix','Sample Source','Result Parameter Name','Result Value',
       'Result Value Units', 'Result Reporting Limit',
       'Result Reporting Limit Type', 'Result Detection Limit',
       'Result Detection Limit Type', 'Result Data Qualifier']]

In [48]:
# split results into soil and water
results_soil_df = results_df[results_df['Sample Source']=='Soil']
results_water_df = results_df[results_df['Sample Matrix']=='Water']

In [49]:
# join screening levels to the results
soil_sl_join = pd.merge(sl_soil_df_join,results_soil_df,how = 'outer', left_on = ['Chemical'], right_on = ['Result Parameter Name'])

In [50]:
# remove screening levels that do not have values
soil_sl_join = soil_sl_join[(soil_sl_join['Screening Level Measurement']!='na')]
soil_sl_join = soil_sl_join[(soil_sl_join['Screening Level Measurement']!='TBD')]
soil_sl_join = soil_sl_join[(soil_sl_join['Screening Level Measurement']!='PQL')]
soil_sl_join['Screening Level Measurement'].astype(float)

0       0.77
1       5.50
2      20.00
3      46.00
4       4.50
       ...  
754     0.88
755     2.90
756     0.23
757      NaN
758      NaN
Name: Screening Level Measurement, Length: 548, dtype: float64

In [51]:
# calculate whether the screening levels have been exceeded
soil_sl_join['SL_exceeded'] = np.where(soil_sl_join['Screening Level Measurement'] <soil_sl_join['Result Value'],'Y','N')

WATER

In [52]:
# join screening levels to results
water_sl_join = pd.merge(sl_water_df_join,results_water_df,how = 'outer', left_on = ['Chemical'], right_on = ['Result Parameter Name'])

In [53]:
water_sl_join = water_sl_join[(water_sl_join['Screening Level Measurement']!='na')]
water_sl_join = water_sl_join[(water_sl_join['Screening Level Measurement']!='TBD')]
water_sl_join = water_sl_join[(water_sl_join['Screening Level Measurement']!='PQL')]
water_sl_join['Screening Level Measurement'].astype(float)

0      0.000052
1      0.000052
2      0.000052
3      0.000052
4      0.009700
         ...   
919         NaN
920         NaN
921         NaN
922         NaN
923         NaN
Name: Screening Level Measurement, Length: 851, dtype: float64

In [54]:
# convert any screening levels that are in mg/L to ug/L
water_sl_join[water_sl_join['SL Unit'] == 'mg/L']

water_sl_join['Screening_Level_Measurement_Convert'] = np.where(water_sl_join['SL Unit'] == 'mg/L', water_sl_join['Screening Level Measurement']*1000, water_sl_join['Screening Level Measurement'])
water_sl_join['Screening_Level_Measurement_Convert'] = np.where(water_sl_join['SL Unit'] == 'ppm', water_sl_join['Screening Level Measurement']*1000, water_sl_join['Screening Level Measurement'])


In [55]:
# calculate whether the screening levels have been exceeded
water_sl_join['SL_exceeded'] = np.where(water_sl_join['Screening_Level_Measurement_Convert']< water_sl_join['Result Value'], 'Y','N')

In [56]:
water_sl_join.columns

Index(['Medium', 'Chemical Group', 'Chemical', 'Scenario',
       'Screening Level Type', 'Screening Level Measurement', 'SL Unit',
       'Source', 'Parameter in Spreadsheet', 'Sample ID',
       'Field Collection Start Date', 'Sample Matrix', 'Sample Source',
       'Result Parameter Name', 'Result Value', 'Result Value Units',
       'Result Reporting Limit', 'Result Reporting Limit Type',
       'Result Detection Limit', 'Result Detection Limit Type',
       'Result Data Qualifier', 'Screening_Level_Measurement_Convert',
       'SL_exceeded'],
      dtype='object')

In [57]:
columns = ['Medium', 'Sample ID','Field Collection Start Date', 'Chemical Group', 'Chemical', 'Result Parameter Name', 'Scenario', 'Screening Level Type',
       'Screening Level Measurement', 'Result Value', 'Result Data Qualifier', 'SL Unit', 'Source', 'SL_exceeded']

all_results = pd.concat([soil_sl_join[columns],water_sl_join[columns]])

In [58]:
# where the screening level is blank, replace exceedance with "no screening level identified"
all_results['Screening Level Measurement'].fillna('No Screening Level Identified', inplace = True)
all_results['SL_exceeded'] = np.where(all_results['Screening Level Measurement']=='No Screening Level Identified','No Screening Level Identified', all_results['SL_exceeded'])

In [59]:
all_results.dropna(subset=['Sample ID'], inplace=True)
all_results.to_csv('/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/s4_and_s6_results.csv', index = False)

GET MOST STRINGENT SCREENING LEVELS BY SCENARIO

In [72]:
sl = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Master_Screening_Levels.xlsx"

In [73]:
sl_soil_df = pd.read_excel(sl, sheet_name='Soil')
sl_water_df = pd.read_excel(sl, sheet_name='Water')

In [74]:
all_sl = pd.concat([sl_soil_df, sl_water_df])

In [75]:
all_sl['Screening Level Measurement'] = np.where(all_sl['Screening Level Measurement'] == 'na', np.NaN, all_sl['Screening Level Measurement'])
all_sl['Screening Level Measurement'] = np.where(all_sl['Screening Level Measurement'] == 'TBD', np.NaN, all_sl['Screening Level Measurement'])
all_sl['Screening Level Measurement'] = np.where(all_sl['Screening Level Measurement'] == 'PQL', np.NaN, all_sl['Screening Level Measurement'])

all_sl['Screening Level Measurement'] = all_sl['Screening Level Measurement'].astype(float)
stringent = all_sl.groupby(by =['Medium', 'Chemical Group', 'Chemical', 'Scenario']).agg({'Screening Level Measurement': ['max']}).reset_index()

In [76]:
stringent.reset_index(inplace = True)

In [77]:
stringent.columns = stringent.columns.droplevel(1)

In [78]:
stringent

Unnamed: 0,index,Medium,Chemical Group,Chemical,Scenario,Screening Level Measurement
0,0,Soil,Dioxin Furans,"2,3,7,8-TCDD",Drinking Water,0.000017
1,1,Soil,Dioxin Furans,"2,3,7,8-TCDD",Human Health,0.000013
2,2,Soil,Dioxin Furans,"2,3,7,8-TCDD",Method B,0.000093
3,3,Soil,Dioxin Furans,"2,3,7,8-TCDD",Method C,0.004100
4,4,Soil,Dioxin Furans,"2,3,7,8-TCDD",Resident,0.000140
...,...,...,...,...,...,...
740,740,Water,RCRA8,Vanadium,Nonpotable,
741,741,Water,RCRA8,Zinc,Direct Contact and Seafood Consumption,772.705856
742,742,Water,RCRA8,Zinc,Drinking Water,4800.000000
743,743,Water,RCRA8,Zinc,Human Seafood Consumption,81.000000


In [81]:
stringent.merge(all_sl, left_on = ['Medium', 'Chemical Group', 'Chemical', 'Scenario', 'Screening Level Measurement'], right_on = ['Medium', 'Chemical Group', 'Chemical', 'Scenario', 'Screening Level Measurement'])

Unnamed: 0,index,Medium,Chemical Group,Chemical,Scenario,Screening Level Measurement,Screening Level Type,SL Unit,Source,Parameter in Spreadsheet
0,0,Soil,Dioxin Furans,"2,3,7,8-TCDD",Drinking Water,0.000017,Protect Drinking Water Vadose Zone Leach,mg/kg,"PCUL SL-2, Vadose Zone Protection of Drinking ...",
1,1,Soil,Dioxin Furans,"2,3,7,8-TCDD",Human Health,0.000013,Direct Contact Unrestricted Upland Soil,mg/kg,"PCUL SL-1, Direct Contact Under Unrestricted L...",
2,2,Soil,Dioxin Furans,"2,3,7,8-TCDD",Method B,0.000093,"Direct Contact, Non Cancer",mg/kg,,Soil Method B Direct Contact Noncancer (mg/kg)
3,3,Soil,Dioxin Furans,"2,3,7,8-TCDD",Method C,0.004100,"Direct Contact, Non Cancer",mg/kg,,Soil Method C Direct Contact Noncancer (mg/kg)
4,4,Soil,Dioxin Furans,"2,3,7,8-TCDD",Resident,0.000140,Inhalation,mg/kg,EPA Regional Screening Level,Inhalation Exceeded (mg/kg)
...,...,...,...,...,...,...,...,...,...,...
917,740,Water,RCRA8,Vanadium,Nonpotable,,Nonpotable,ug/L,,Most Stringent PCUL Nonpotable Water
918,741,Water,RCRA8,Zinc,Direct Contact and Seafood Consumption,772.705856,Sediment,ug/L,PCUL GW-3,PCUL Protect Sediment
919,742,Water,RCRA8,Zinc,Drinking Water,4800.000000,Drinking Water,ug/L,PCUL GW-1,PCUL GW-1 Protect Drinking Water
920,743,Water,RCRA8,Zinc,Human Seafood Consumption,81.000000,Surface Water,ug/L,PCUL GW-2,PCUL Protect Surface Water
