In [None]:
import pandas as pd
from pathlib import Path
import numpy as np
import panel as pn
pn.extension('plotly')
import plotly.express as px
import hvplot.pandas
import os
from libs.p1_library import chemical_filter, unit_conversion, chemical_to_moles, clean_data_df, conversion_factor_dict

from datetime import datetime


In [16]:
file_path = '../data/cleandata'
all_chems_df = clean_data_df(file_path)
all_chems_df.dropna(subset=['CHEMICAL_NAME'],inplace=True)
all_chems_df = all_chems_df[['LOC_NAME','SAMPLE_DATE','CAS_RN','CHEMICAL_NAME','REPORT_RESULT_VALUE','REPORT_RESULT_UNIT','LATITUDE','LONGITUDE']]
chemicals = all_chems_df.CHEMICAL_NAME.unique()
units_to_convert = list(conversion_factor_dict.keys())
all_chems_df = all_chems_df[all_chems_df['REPORT_RESULT_UNIT'].isin(units_to_convert)]
all_chems_df['VALUE_MUGRAM_PER_GRAM'] = all_chems_df.apply(unit_conversion,axis=1)
all_chems_df


In [18]:
all_chems_df['SAMPLE_DATE'] = pd.to_datetime(all_chems_df['SAMPLE_DATE'],utc=True)
all_chems_df['SAMPLE_YEAR_MONTH'] = all_chems_df['SAMPLE_DATE'].dt.strftime('%Y-%m')
all_chems_df.drop('SAMPLE_DATE',axis=1,inplace=True)

In [23]:
all_chems_grouped = all_chems_df.groupby(['SAMPLE_YEAR_MONTH','CHEMICAL_NAME']).mean()

all_chems_grouped['VALUE_MUGRAM_PER_GRAM'].hvplot.line(
    x='SAMPLE_YEAR_MONTH',
    groupby='CHEMICAL_NAME',
    title='Volatile Chemical Measurements in the Passaic River Basin',
    xlabel='sample year and month',
    ylabel='average concentration (ug/g)',
    rot=90)

BokehModel(combine_events=True, render_bundle={'docs_json': {'1e8fb5dc-3d6a-4c2a-92ec-a77f3e66c05c': {'defs': …

In [2]:
file_path = '../data/cleandata'
chemical_list = ["2,3,7,8-Tetrachlorodibenzo-p-dioxin",
    "Dieldrin",
    "Hexachlorobiphenyl; 3,3',4,4',5,5'- (PCB 169)",
    "Pentachlorobiphenyl; 3,3',4,4',5- (PCB 126)",
    "Mercury",
    "Lead",
    "Cyanide",
    "1,2-Dichlorobenzene",
    "1,4-Dichlorobenzene",
    "2-Chlorophenol",
    "Chlorobenzene",
    "p,p'-DDD",
    "Benzene",
    "Chloroform",
    "Pentachlorobiphenyl; 2',3,4,4',5- (PCB 123)",
    "p,p'-DDT",
    "p,p'-DDE",
    "Aldrin",
    "Aroclor 1016",
    "Aroclor 1221",
    "Aroclor 1232",
    "Aroclor 1242",
    "Aroclor 1248",
    "Aroclor 1254",
    "Aroclor 1260",
    "Pentachlorobiphenyl; 2,3,3',4,4'- (PCB 105)",
    "Pentachlorobiphenyl; 2,3,3',4',6- (PCB 110)",
    "Pentachlorobiphenyl; 2,3,4,4',5- (PCB 114)",
    "Pentachlorobiphenyl; 2,3',4,4',5- (PCB 118)",
    "Chromium"
]

chemical_df = chemical_filter(file_path,chemical_list)
chemical_df.CHEMICAL_NAME.value_counts()


2,3,7,8-Tetrachlorodibenzo-p-dioxin              14659
p,p'-DDD                                         13847
Dieldrin                                         13847
p,p'-DDT                                         13846
p,p'-DDE                                         13846
Lead                                             12875
Mercury                                          12812
Pentachlorobiphenyl; 2,3',4,4',5- (PCB 118)       9931
Pentachlorobiphenyl; 2,3,3',4,4'- (PCB 105)       9931
Pentachlorobiphenyl; 3,3',4,4',5- (PCB 126)       9931
Hexachlorobiphenyl; 3,3',4,4',5,5'- (PCB 169)     9931
Pentachlorobiphenyl; 2,3,4,4',5- (PCB 114)        9652
Pentachlorobiphenyl; 2',3,4,4',5- (PCB 123)       9652
Pentachlorobiphenyl; 2,3,3',4',6- (PCB 110)       9347
Chromium                                          3274
Cyanide                                           2433
Aldrin                                            2349
2-Chlorophenol                                    2225
1,4-Dichlo

In [37]:
chem_moles = chemical_to_moles(chemical_df)
chem_moles['SAMPLE_DATE'] = pd.to_datetime(chem_moles['SAMPLE_DATE'],utc=True)

In [56]:

avg_chem_by_year_month = chem_moles.drop('index',axis=1)
avg_chem_by_year_month.reset_index(inplace=True)


In [61]:
avg_chem_by_year_month['SAMPLE_YEAR_MONTH'] = avg_chem_by_year_month['SAMPLE_DATE'].dt.strftime('%Y-%m')
avg_chem_by_year_month.drop('SAMPLE_DATE',axis=1,inplace=True)

In [62]:

chem_grouped_year_month = avg_chem_by_year_month.groupby(['SAMPLE_YEAR_MONTH','CHEMICAL_NAME']).mean()
chem_grouped_year_month.tail(50)
#avg_chem_by_date.sort_index(level=['SAMPLE_DATE'], ascending=True, sort_remaining=False, inplace=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,REPORT_RESULT_VALUE,REPORT_RESULT_LIMIT,LONGITUDE,LATITUDE,VALUE_MUGRAM_PER_GRAM,VALUE_MUMOL_PER_GRAM
SAMPLE_YEAR_MONTH,CHEMICAL_NAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-05,"2,3,7,8-Tetrachlorodibenzo-p-dioxin",6892.989,46.349746,-74.141125,40.73992,0.006852,2.128076e-05
2019-05,Aldrin,0.4078,0.4078,-74.137123,40.734004,0.000408,1.117566e-06
2019-05,Aroclor 1016,325.6,325.6,-74.137123,40.734004,0.000326,1.264255e-06
2019-05,Aroclor 1221,23.62,23.62,-74.137123,40.734004,2.4e-05,1.252034e-07
2019-05,Aroclor 1232,23.36,23.36,-74.137123,40.734004,2.3e-05,1.238252e-07
2019-05,Aroclor 1242,1561200.0,361.2,-74.137123,40.734004,1.5612,0.00599148
2019-05,Aroclor 1248,677.4,677.4,-74.137123,40.734004,0.000677,2.319958e-06
2019-05,Aroclor 1254,1405400.0,460.0,-74.137123,40.734004,1.4054,0.00430576
2019-05,Aroclor 1260,674800.0,59.4,-74.137123,40.734004,0.6748,0.001794681
2019-05,Dieldrin,525321.3,486.085618,-74.140707,40.740416,0.464809,0.00122026


<class 'pandas.core.frame.DataFrame'>
Int64Index: 195839 entries, 0 to 195838
Data columns (total 16 columns):
 #   Column                 Non-Null Count   Dtype              
---  ------                 --------------   -----              
 0   SAMPLE_DATE            195839 non-null  datetime64[ns, UTC]
 1   index                  195839 non-null  int64              
 2   TASK_CODE              195839 non-null  object             
 3   ANALYTIC_METHOD        195839 non-null  object             
 4   CAS_RN                 195839 non-null  object             
 5   CHEMICAL_NAME          195839 non-null  object             
 6   REPORT_RESULT_VALUE    195839 non-null  float64            
 7   REPORT_RESULT_UNIT     195839 non-null  object             
 8   REPORT_RESULT_LIMIT    189002 non-null  float64            
 9   DETECT_FLAG            195839 non-null  object             
 10  REPORTABLE_RESULT      195839 non-null  object             
 11  LONGITUDE              195839 non-null 

Unnamed: 0,SAMPLE_DATE,index,TASK_CODE,ANALYTIC_METHOD,CAS_RN,CHEMICAL_NAME,REPORT_RESULT_VALUE,REPORT_RESULT_UNIT,REPORT_RESULT_LIMIT,DETECT_FLAG,REPORTABLE_RESULT,LONGITUDE,LATITUDE,LOC_NAME,VALUE_MUGRAM_PER_GRAM,VALUE_MUMOL_PER_GRAM
0,2017-10-10 00:00:00+00:00,54,2017-2019 OU2 PDI Sediment,E1613,1746-01-6,"2,3,7,8-Tetrachlorodibenzo-p-dioxin",39.3,pg/g,0.189,Y,Yes,-74.118448,40.708445,,0.000039,1.220611e-07
1,2017-10-19 00:00:00+00:00,130,2017-2019 OU2 PDI Sediment,E1613,1746-01-6,"2,3,7,8-Tetrachlorodibenzo-p-dioxin",106.0,pg/g,0.654,Y,Yes,-74.120683,40.707897,,0.000106,3.292232e-07
2,2017-10-19 00:00:00+00:00,222,2017-2019 OU2 PDI Sediment,E1613,1746-01-6,"2,3,7,8-Tetrachlorodibenzo-p-dioxin",199.0,pg/g,0.654,Y,Yes,-74.120683,40.707897,,0.000199,6.180700e-07
3,2017-10-19 00:00:00+00:00,298,2017-2019 OU2 PDI Sediment,E1613,1746-01-6,"2,3,7,8-Tetrachlorodibenzo-p-dioxin",548.0,pg/g,1.090,Y,Yes,-74.120683,40.707897,,0.000548,1.702022e-06
4,2017-10-19 00:00:00+00:00,390,2017-2019 OU2 PDI Sediment,E1613,1746-01-6,"2,3,7,8-Tetrachlorodibenzo-p-dioxin",98.9,pg/g,0.986,Y,Yes,-74.120683,40.707897,,0.000099,3.071715e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195834,2019-07-29 00:00:00+00:00,28838,2019 OU2 PDI Porewater Passive Sampler,E1668A,31508-00-6,"Pentachlorobiphenyl; 2,3',4,4',5- (PCB 118)",107000.0,pg/g,291.000,Y,Yes,-74.155138,40.771860,LPR-0765-01,0.107000,3.303662e-04
195835,2019-07-19 00:00:00+00:00,29434,2019 OU2 PDI Porewater Passive Sampler,E1668A,31508-00-6,"Pentachlorobiphenyl; 2,3',4,4',5- (PCB 118)",1560000.0,pg/g,884.000,Y,Yes,-74.152877,40.775143,LPR-0790-01,1.560000,4.816554e-03
195836,2019-07-08 00:00:00+00:00,29501,2019 OU2 PDI Porewater Passive Sampler,E1668A,31508-00-6,"Pentachlorobiphenyl; 2,3',4,4',5- (PCB 118)",150000.0,pg/g,551.000,Y,Yes,-74.152242,40.774933,LPR-0790-03R,0.150000,4.631302e-04
195837,2019-07-08 00:00:00+00:00,30096,2019 OU2 PDI Porewater Passive Sampler,E1668A,31508-00-6,"Pentachlorobiphenyl; 2,3',4,4',5- (PCB 118)",1100000.0,pg/g,3380.000,Y,Yes,-74.163651,40.757093,LPR-0650-05,1.100000,3.396288e-03


In [65]:

chem_grouped_year_month['VALUE_MUMOL_PER_GRAM'].hvplot.line(
    x='SAMPLE_YEAR_MONTH',
    groupby='CHEMICAL_NAME',
    rot=90)

BokehModel(combine_events=True, render_bundle={'docs_json': {'242c6f7d-af0b-443d-9df8-35bfbfeadd18': {'defs': …

Unnamed: 0,SAMPLE_DATE,TASK_CODE,ANALYTIC_METHOD,CAS_RN,CHEMICAL_NAME,REPORT_RESULT_VALUE,REPORT_RESULT_UNIT,REPORT_RESULT_LIMIT,DETECT_FLAG,REPORTABLE_RESULT,LONGITUDE,LATITUDE,LOC_NAME,VALUE_MUGRAM_PER_GRAM,VALUE_MUMOL_PER_GRAM
143782,2019-02-22 09:45:00,2018 Passaic WC,E1631,7439-97-6,Mercury,1.3,ng/l,0.5,Y,Yes,-74.164638,40.757913,,1e-06,6.480881e-09
7824,2019-02-20 10:45:00,2018 Passaic WC,E1631,7439-97-6,Mercury,1.2,ng/l,0.5,Y,Yes,-74.144989,40.734852,,1e-06,5.982352e-09
20590,2017-12-09 11:12:00,2017 Passaic,SW6010,7439-92-1,Lead,187.0,mg/kg,4.0,Y,Yes,-74.158334,40.764976,,187.0,0.9025097
156286,2017-11-02 16:11:00,2017-2019 OU2 PDI Sediment,E1699,60-57-1,Dieldrin,2040.0,pg/g,496.0,Y,Yes,-74.149453,40.733965,,0.00204,5.355596e-06
36556,2017-10-24 08:00:00,2017-2019 OU2 PDI Sediment,E1613,1746-01-6,"2,3,7,8-Tetrachlorodibenzo-p-dioxin",132.0,pg/g,0.31,Y,Yes,-74.115849,40.713799,,0.000132,4.099761e-07


In [5]:
lead_mercury_cyanide_list = ['Lead','Mercury','Cyanide']
pb_hg_cn_df = chem_moles_df[chem_moles_df['CHEMICAL_NAME'].isin(lead_mercury_cyanide_list)]
pb_hg_cn_df.head()

Unnamed: 0,SAMPLE_DATE,TASK_CODE,ANALYTIC_METHOD,CAS_RN,CHEMICAL_NAME,REPORT_RESULT_VALUE,REPORT_RESULT_UNIT,REPORT_RESULT_LIMIT,DETECT_FLAG,REPORTABLE_RESULT,LONGITUDE,LATITUDE,LOC_NAME,VALUE_MUGRAM_PER_GRAM,VALUE_MUMOL_PER_GRAM
33,2017-10-10 08:00:00,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.118448,40.708445,,0.0002,9.970587e-07
120,2017-10-19 14:50:00,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.120683,40.707897,,0.0002,9.970587e-07
201,2017-10-19 14:50:00,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.120683,40.707897,,0.0002,9.970587e-07
288,2017-10-19 14:50:00,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.120683,40.707897,,0.0002,9.970587e-07
369,2017-10-19 14:50:00,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.120683,40.707897,,0.0002,9.970587e-07


In [6]:
pb_hg_cn_df['SAMPLE_DATE'] = pb_hg_cn_df['SAMPLE_DATE'].dt.date
pb_hg_cn_df
#pb_hg_cn_grouped = pb_hg_cn_df.groupby(['SAMPLE_DATE','CHEMICAL_NAME']).sum()
#pb_hg_cn_grouped['VALUE_MUMOL_PER_GRAM'].hvplot.line(groupby='CHEMICAL_NAME')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,SAMPLE_DATE,TASK_CODE,ANALYTIC_METHOD,CAS_RN,CHEMICAL_NAME,REPORT_RESULT_VALUE,REPORT_RESULT_UNIT,REPORT_RESULT_LIMIT,DETECT_FLAG,REPORTABLE_RESULT,LONGITUDE,LATITUDE,LOC_NAME,VALUE_MUGRAM_PER_GRAM,VALUE_MUMOL_PER_GRAM
33,2017-10-10,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.118448,40.708445,,0.0002,9.970587e-07
120,2017-10-19,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.120683,40.707897,,0.0002,9.970587e-07
201,2017-10-19,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.120683,40.707897,,0.0002,9.970587e-07
288,2017-10-19,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.120683,40.707897,,0.0002,9.970587e-07
369,2017-10-19,2017-2019 OU2 PDI Sediment,1311/6010B/7470A,7439-97-6,Mercury,0.0002,mg/l,0.0002,N,Yes,-74.120683,40.707897,,0.0002,9.970587e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15876,2019-07-09,2019 OU2 PDI Porewater Passive Sampler,SW6010,7439-92-1,Lead,400.0000,mg/kg,0.9900,Y,Yes,-74.145593,40.735957,LPR-0403-01,400.0000,1.930502e+00
15881,2019-07-10,2019 OU2 PDI Porewater Passive Sampler,SW6010,7439-92-1,Lead,370.0000,mg/kg,1.1000,Y,Yes,-74.149512,40.734412,LPR-0430-07,370.0000,1.785714e+00
15884,2019-07-09,2019 OU2 PDI Porewater Passive Sampler,SW6010,7439-92-1,Lead,11.0000,mg/kg,0.7300,Y,Yes,-74.155944,40.770182,LPR-0752-01R,11.0000,5.308880e-02
15889,2019-07-09,2019 OU2 PDI Porewater Passive Sampler,SW6010,7439-92-1,Lead,51.0000,mg/kg,0.6400,Y,Yes,-74.152242,40.774933,LPR-0790-03R,51.0000,2.461390e-01
