In [11]:
import pandas as pd
import geopandas as gp
import numpy as np
import os
import itertools

In [60]:
# list all files in the directory
subfolder_path = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results"

# Use a list comprehension to get all .xlsx files in the subfolder
results_files = [f for f in os.listdir(subfolder_path) if f.endswith(".xlsx")]

August 8 Sampling Event Screening Results.xlsx
Duwamish Puget Creek Screening Results 2023.xlsx
Duwamish Superfund Waters Screening Results Jan. 2023.xlsx
Kellogg Island Screening Results 2023.xlsx
People's Park Screening Results 2023.xlsx
S4 Screening Results 2023.xlsx


In [56]:
# list all tabs within the excel file

tab_list = ['pah_soils', 'rcra_soils', 'rcra_water', 'pah_soil_gw','pah_water_mtca']

PAH SOILS

In [48]:
# Read Excel file
df = pd.read_excel("/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/August 8 Sampling Event Screening Results.xlsx", sheet_name = "PAH Soils")

# Process Excel file
df.drop([0,2,3,4], inplace = True)
df = df[df['Unnamed: 0'].isna()==False]
new_columns = df.iloc[0]
df.columns = new_columns
df.drop(1, inplace = True)
df.reset_index(drop = True, inplace = True)

# Find the contaminants that are exceeded
mask = df.eq('Y').any()
df_exceeded = df.loc[:, mask]

# join sample id and date collected back to the df_exceeded table
df_exceeded = pd.merge(left = df[['Date Collected', 'Sample ID']], right = df_exceeded, how = 'inner', left_index = True, right_index = True)

# Concatenate back to other results
pah_soils = pd.concat([df_exceeded])

In [61]:
df_exceeded

1,Date Collected,Sample ID,WA DOE Anthracene Default Cleanup Level Method A Unrestricted (mg/kg) (2),WA DOE Benzo(a)anthracene Cleanup Level (mg/kg) (2),EPA RSL Benzo(a)pyrene Ingested (mg/kg) (1),WA DOE Benzo(a)pyrene Default Cleanup Level Method A Unrestricted (mg/kg) (2),WA DOE Benzo(b)fluoranthene Default Cleanup Level Method A Unrestricted (mg/kg) (2),"WA DOE Benzo(g,h,i)perylene Default Cleanup Level Method A Unrestricted (mg/kg) (2)",WA DOE Benzo(k)fluoranthene Default Cleanup Level Method A Unrestricted (mg/kg) (2),WA DOE Chrysene Default Cleanup Level Method A Unrestricted (mg/kg) (2),WA DOE Fluoranthene Default Cleanup Level Method A Unrestricted (mg/kg) (2),"WA DOE Indeno(1,2,3-cd)pyrene Default Cleanup Level Method A Unrestricted (mg/kg) (2)",WA DOE Naphthalene Default Cleanup Level Method A Unrestricted (mg/kg) (2),WA DOE Phenanthrene Default Cleanup Level Method A Unrestricted (mg/kg) (2),WA DOE Pyrene Default Cleanup Level Method A Unrestricted (mg/kg) (2)
0,2023-08-08 00:00:00,HPS-1,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y


JOIN TABLES OF RESULTS TO MASTER SCREENING LEVELS FROM F&B

In [174]:
# bring in screening results and master screening level spreadsheets
results = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/S4 and S6 Screening Results_CLEAN.xlsx"
sl = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Master_Screening_Level.xlsx"

In [175]:
# create data frames of the raw data and the lookup
results_df = pd.read_excel(results, sheet_name='RAW_DATA')
pcb_arc_lookup = pd.read_excel(results, sheet_name='PCB to Aroclor Lookup')

# create data frame of the screening levels for soils and
sl_soil_df = pd.read_excel(sl, sheet_name='Soil')
sl_water_df = pd.read_excel(sl, sheet_name='Water')

In [176]:
# format pcb to aroclor loo
pcb_arc_lookup = pcb_arc_lookup[pcb_arc_lookup['Aroclor Name'].str.contains('aroclor', na=False)][['PCB Isomer', 'Aroclor Name']]

In [177]:
# replace pcb names with the aroclor names to match with F&B results
sl_soil_df_join = pd.merge(sl_soil_df, pcb_arc_lookup, how = 'outer', left_on = 'Chemical', right_on = 'PCB Isomer')
sl_soil_df_join['Chemical'] = np.where(sl_soil_df_join['Aroclor Name'].str.contains('aroclor', na=False), sl_soil_df_join['Aroclor Name'], sl_soil_df_join['Chemical'])
sl_soil_df_join.drop(columns = pcb_arc_lookup.columns, inplace = True)
sl_soil_df_join = sl_soil_df_join[sl_soil_df_join['Medium']=='Soil']

In [178]:
# replace pcb names with the aroclor names to match with F&B results
sl_water_df_join = pd.merge(sl_water_df, pcb_arc_lookup, how = 'outer', left_on = 'Chemical', right_on = 'PCB Isomer')
sl_water_df_join['Chemical'] = np.where(sl_water_df_join['Aroclor Name'].str.contains('aroclor', na=False), sl_water_df_join['Aroclor Name'], sl_water_df_join['Chemical'])
sl_water_df_join.drop(columns = pcb_arc_lookup.columns, inplace = True)
sl_water_df_join = sl_water_df_join[sl_water_df_join['Medium']=='Water']

In [179]:
# remove unnecessary columns from raw data
results_df = results_df[['Sample ID','Sample Matrix','Sample Source','Result Parameter Name','Result Value',
       'Result Value Units', 'Result Reporting Limit',
       'Result Reporting Limit Type', 'Result Detection Limit',
       'Result Detection Limit Type', 'Result Data Qualifier']]

In [180]:
# split results into soil and water

results_soil_df = results_df[results_df['Sample Source']=='Soil']
results_water_df = results_df[results_df['Sample Matrix']=='Water']

In [181]:
# join screening levels to the results
soil_sl_join = pd.merge(sl_soil_df_join,results_soil_df,how = 'outer', left_on = ['Chemical'], right_on = ['Result Parameter Name'])

In [182]:
# remove screening levels that do not have values
soil_sl_join = soil_sl_join[(soil_sl_join['Screening Level Measurement']!='na')]
soil_sl_join = soil_sl_join[(soil_sl_join['Screening Level Measurement']!='TBD')]
soil_sl_join = soil_sl_join[(soil_sl_join['Screening Level Measurement']!='PQL')]
soil_sl_join['Screening Level Measurement'].astype(float)

0       0.77
1       5.50
2      20.00
3      46.00
4       4.50
       ...  
754     0.88
755     2.90
756     0.23
757      NaN
758      NaN
Name: Screening Level Measurement, Length: 548, dtype: float64

In [183]:
# calculate whether the screening levels have been exceeded
soil_sl_join['SL_exceeded'] = np.where(soil_sl_join['Screening Level Measurement'] <soil_sl_join['Result Value'],'Y','N')

In [184]:
# join screening levels to results
water_sl_join = pd.merge(sl_water_df_join,results_water_df,how = 'outer', left_on = ['Chemical'], right_on = ['Result Parameter Name'])

In [185]:
# convert any screening levels that are in mg/L to ug/L
water_sl_join[water_sl_join['Unit'] == 'mg/L']
water_sl_join['Screening_Level_Measurement_Convert'] = np.where(water_sl_join['Unit'] == 'mg/L', water_sl_join['Screening Level Measurement']*1000, water_sl_join['Screening Level Measurement']) 

In [186]:
# calculate whether the screening levels have been exceeded
water_sl_join['SL_exceeded'] = np.where(water_sl_join['Screening_Level_Measurement_Convert']< water_sl_join['Result Value'], 'Y','N')

In [187]:
water_sl_join.columns

Index(['Medium', 'Chemical Group', 'Chemical', 'Screening Level Type',
       'Screening Level Measurement', 'Unit', 'Source',
       'Parameter in Spreadsheet', 'Sample ID', 'Sample Matrix',
       'Sample Source', 'Result Parameter Name', 'Result Value',
       'Result Value Units', 'Result Reporting Limit',
       'Result Reporting Limit Type', 'Result Detection Limit',
       'Result Detection Limit Type', 'Result Data Qualifier',
       'Screening_Level_Measurement_Convert', 'SL_exceeded'],
      dtype='object')

In [188]:
columns = ['Medium', 'Sample ID','Chemical Group', 'Chemical', 'Result Parameter Name', 'Screening Level Type',
       'Screening Level Measurement', 'Result Value', 'Result Data Qualifier', 'Unit', 'Source', 'SL_exceeded']

all_results = pd.concat([soil_sl_join[columns],water_sl_join[columns]])

In [194]:
# where the screening level is blank, replace exceedance with "no screening level identified"
all_results['Screening Level Measurement'].fillna('No Screening Level Identified', inplace = True)
all_results['SL_exceeded'] = np.where(all_results['Screening Level Measurement']=='No Screening Level Identified','No Screening Level Identified', all_results['SL_exceeded'])

In [196]:
all_results.dropna(subset=['Sample ID'], inplace=True)
all_results.to_csv('/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/all_results.csv', index = False)