In [1]:
import pandas as pd
import geopandas as gp
import numpy as np
import os
import requests
import urllib.parse

In [2]:
# initiate file paths
input_file_path = "/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/aggregated_results_geom.csv"

In [3]:
def drop_levels(df):
    df.reset_index(inplace = True)
    df.columns = df.columns.droplevel(1)
    return df

In [4]:
all_results = pd.read_csv(input_file_path)

In [5]:
sl_by_group = all_results.copy()

# count screening level exceedances by pah, pcb, df, and each individual metal
sl_by_group['Chemical Group'] = np.where(sl_by_group['Chemical Group']=='RCRA8', sl_by_group['Chemical'], sl_by_group['Chemical Group'])

In [6]:
# count of total exceedances by group
cnt_exceedances = sl_by_group[sl_by_group['SL_exceeded']=='Y']
cnt_exceedances = cnt_exceedances.groupby(by = ['DATE','Sample ID','Chemical Group','Medium','Latitude','Longitude']).agg({'SL_exceeded': ['count']}).reset_index()
cnt_exceedances = drop_levels(cnt_exceedances)

In [7]:
# calculate total screening level count
cnt_sl = sl_by_group[sl_by_group['SL_exceeded'] != 'No Screening Level Identified']
cnt_sl = cnt_sl.groupby(by = ['DATE','Sample ID','Chemical Group','Medium','Latitude','Longitude']).agg({'SL_exceeded': ['count']}).reset_index()
cnt_sl = drop_levels(cnt_sl)

In [8]:
cnt_exceedances_merge = cnt_exceedances.merge(cnt_sl, left_on = ['DATE','Sample ID','Chemical Group','Medium','Latitude','Longitude'], right_on = ['DATE','Sample ID','Chemical Group','Medium','Latitude','Longitude'], how = 'left')

In [9]:
cnt_exceedances_merge.rename(columns = {'SL_exceeded_x':'cnt_sl_exceed', 'SL_exceeded_y':'tot_cnt_sl'}, inplace = True)
cnt_exceedances_merge['pct_exceed'] = cnt_exceedances_merge['cnt_sl_exceed'] /cnt_exceedances_merge['tot_cnt_sl']
cnt_exceedances_merge.drop(columns = {'index_x', 'index_y'}, inplace = True)

CALCULATE MOST STRINGENT - move this to the join SL spreadsheet

In [62]:
sl_soil = pd.read_excel("/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Lookup Tables/Master_Screening_Levels.xlsx", sheet_name='Soil')
sl_water = pd.read_excel("/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/Lookup Tables/Master_Screening_Levels.xlsx", sheet_name='Water')
stringent_merge = pd.concat([sl_soil, sl_water])

In [63]:
stringent_merge = stringent_merge[stringent_merge['Screening Level Measurement']!='N/A']
stringent_merge = stringent_merge[(stringent_merge['Screening Level Measurement']!='na')]
stringent_merge = stringent_merge[(stringent_merge['Screening Level Measurement']!='TBD')]
stringent_merge = stringent_merge[(stringent_merge['Screening Level Measurement']!='PQL')]
stringent_merge = stringent_merge[(stringent_merge['Screening Level Measurement']!='No Screening Level Identified')]
stringent_merge['Screening Level Measurement'] = stringent_merge['Screening Level Measurement'].astype(float)

In [64]:
# for each scenario and chemical combination, calculate the most stringent SL
stringent = sl.groupby(by =['Medium', 'Chemical Group', 'Chemical', 'Scenario']).agg({'Screening Level Measurement': ['min']}).reset_index()
#stringent = drop_levels(stringent)

In [65]:
# for each scenario and chemical combination, count number of SL by chemical group
stringent_cnt = stringent_merge.groupby(by =['Medium', 'Chemical Group', 'Scenario']).agg({'Screening Level Measurement': ['count']})
stringent_cnt = drop_levels(stringent_cnt)

In [66]:
# get all results
only_results = all_results[['DATE','Sample ID', 'Medium', 'Chemical Group', 'Chemical','Result Value','Latitude','Longitude']].drop_duplicates()

In [67]:
# join the most stringent results to all results
stringent_merge = only_results.merge(stringent, left_on = ['Medium', 'Chemical Group', 'Chemical'], right_on = ['Medium', 'Chemical Group', 'Chemical'], how = 'right')

In [13]:
sl = sl[(sl['Screening Level Measurement']!='na')]
sl = sl[(sl['Screening Level Measurement']!='TBD')]
sl = sl[(sl['Screening Level Measurement']!='PQL')]
sl = sl[(sl['Screening Level Measurement']!='No Screening Level Identified')]

In [14]:
sl['Screening Level Measurement'] = sl['Screening Level Measurement'].astype(float)

In [68]:
sl['SL_exceed'] = np.where(sl['Screening Level Measurement'] < sl['Result Value'], 'Y','N')
sl['SL_diff'] = sl['Result Value'] - sl['Screening Level Measurement']

In [16]:
stringent_merge_exceed = stringent_merge[stringent_merge['SL_exceed']=='Y']

In [70]:
# TODO: calculate the % of contaminants within a chemical group and scenario that exceed
# count the total number of stringent SL exceeded by chemical

In [17]:
stringent_merge_exceed.drop(columns = 'index', inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stringent_merge_exceed.drop(columns = 'index', inplace = True)


EXPORT FOR MAPS

In [18]:
# export results that only exceed most stringent screening levels
stringent_merge_exceed.to_csv("/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/stringent_exceeded.csv", index = False)

In [19]:
# export results based on count of exceedances and for each results
cnt_exceedances_merge.to_csv("/home/nweiss/gdrive/Year 2/Summer - Duwamish/Sampling_Results/count_exceedances.csv", index = False)