# This notebook will read the data taken from the EPA and pull out what chemicals in the air, water and soil have reached reporting levels at the East Palestine Ohio train derailment site.

Sources:

1. https://www.epa.gov/oh/water-sampling-data-east-palestine-ohio-train-derailment
2. https://www.epa.gov/oh/soil-and-sediment-sampling-data-east-palestine-ohio-train-derailment
3. https://www.epa.gov/oh/air-sampling-data-east-palestine-ohio-train-derailment

In [2]:
# required imports
import pandas as pd
import numpy as np
from pathlib import Path 
import hvplot.pandas 
import seaborn as sb   
import matplotlib.pyplot as plt


In [3]:
# create path objects
air_csv = Path('./data/air.csv')
water_csv = Path('./data/water.csv')
soil_csv = Path('./data/soil.csv')

In [5]:
# read into pandas dataframes
air_df = pd.read_csv(air_csv, infer_datetime_format=True, index_col=None)
water_df = pd.read_csv(water_csv, infer_datetime_format=True, index_col=None)
soil_df = pd.read_csv(soil_csv, infer_datetime_format=True, index_col=None)

In [7]:
# create dfs that only have analyte and Reporting limit comparison
air_report_df = air_df[['Analyte','RL_Comparison']]
water_report_df = water_df[['Analyte','RL_Comparison']]
soil_report_df = soil_df[['Analyte','RL_Comparison']]

# review dataframe
display(air_report_df.head())
display(water_report_df.head())
display(soil_report_df.head())

Unnamed: 0,Analyte,RL_Comparison
0,Dichlorodifluoromethane (CFC 12),Yes
1,Chloromethane,Yes
2,Vinyl Chloride,No
3,"1,3-Butadiene",No
4,Bromomethane,No


Unnamed: 0,Analyte,RL_Comparison
0,DRO (C10-C28),Yes
1,ORO (C28-C40),Yes
2,GRO (C6-C10),Yes
3,"1,1`-Biphenyl",No
4,"1,2,4,5-Tetrachlorobenzene",No


Unnamed: 0,Analyte,RL_Comparison
0,Trichlorofluoromethane,No
1,Vinyl Chloride,No
2,"Xylenes, Total",No
3,DRO (C10-C28),No
4,ORO (C28-C40),No


In [16]:
# select analyte where RL_Comparison == Yes
air_analyte_df = air_report_df.loc[air_report_df['RL_Comparison'] == 'Yes']
water_analyte_df = water_report_df.loc[water_report_df['RL_Comparison'] == 'Yes']
soil_analyte_df = soil_report_df.loc[soil_report_df['RL_Comparison'] == 'Yes']



Unnamed: 0,Analyte,RL_Comparison
0,Dichlorodifluoromethane (CFC 12),Yes
1,Chloromethane,Yes
7,Acetone,Yes
8,Trichlorofluoromethane,Yes
10,Methylene chloride,Yes


Unnamed: 0,Analyte,RL_Comparison
0,DRO (C10-C28),Yes
1,ORO (C28-C40),Yes
2,GRO (C6-C10),Yes
56,Fluoranthene,Yes
64,Naphthalene,Yes


Unnamed: 0,Analyte,RL_Comparison
97,Acetone,Yes
238,Acetone,Yes
256,DRO (C10-C28),Yes
257,ORO (C28-C40),Yes
368,Acetone,Yes


In [23]:
# Display individual tables
display(air_analyte_df.head(20))
display(water_analyte_df.head(20))
display(soil_analyte_df.head(20))

Unnamed: 0,Analyte,RL_Comparison
0,Dichlorodifluoromethane (CFC 12),Yes
1,Chloromethane,Yes
7,Acetone,Yes
8,Trichlorofluoromethane,Yes
10,Methylene chloride,Yes
11,"1,1,2-Trichlorotrifluoroethane",Yes
17,"1,2-Dichloroethane",Yes
19,Benzene,Yes
20,Carbon Tetrachloride,Yes
28,Toluene,Yes


Unnamed: 0,Analyte,RL_Comparison
0,DRO (C10-C28),Yes
1,ORO (C28-C40),Yes
2,GRO (C6-C10),Yes
56,Fluoranthene,Yes
64,Naphthalene,Yes
69,Phenanthrene,Yes
71,Pyrene,Yes
129,pH (laboratory),Yes
130,Temperature,Yes
131,DRO (C10-C28),Yes


Unnamed: 0,Analyte,RL_Comparison
97,Acetone,Yes
238,Acetone,Yes
256,DRO (C10-C28),Yes
257,ORO (C28-C40),Yes
368,Acetone,Yes
369,Benzene,Yes
381,Toluene,Yes
386,GRO (C6-C10),Yes
479,Benzene,Yes
500,Methylcyclohexane,Yes


In [31]:
# display value counts
air_analyte_df.value_counts()

Analyte                           RL_Comparison
1,1,2-Trichlorotrifluoroethane    Yes              32
Carbon Tetrachloride              Yes              32
1,2-Dichloroethane                Yes              32
Trichlorofluoromethane            Yes              32
Toluene                           Yes              32
Methylene chloride                Yes              32
Dichlorodifluoromethane (CFC 12)  Yes              32
Chloromethane                     Yes              32
Benzene                           Yes              32
m,p-Xylenes                       Yes              30
Tetrachloroethene                 Yes              27
o-Xylene                          Yes              24
1,2,4-Trimethylbenzene            Yes              23
Acetone                           Yes              22
1,4-Dichlorobenzene               Yes              22
Ethylbenzene                      Yes              21
Vinyl Chloride                    Yes              18
1,3-Butadiene                     

In [32]:
water_analyte_df.value_counts()

Analyte                     RL_Comparison
DRO (C10-C28)               Yes              9
Phenanthrene                Yes              8
ORO (C28-C40)               Yes              8
GRO (C6-C10)                Yes              8
Fluoranthene                Yes              8
Vinyl Chloride              Yes              6
Pyrene                      Yes              5
Naphthalene                 Yes              4
pH (laboratory)             Yes              3
Temperature                 Yes              3
Benzo(a)anthracene          Yes              3
Fluorene                    Yes              2
Chrysene                    Yes              2
2-Methylnaphthalene         Yes              2
Bis(2-ethylhexyl)phthalate  Yes              2
Benzo(b)fluoranthene        Yes              2
Benzo(a)pyrene              Yes              2
Anthracene                  Yes              2
Acenaphthylene              Yes              2
Acenaphthene                Yes              2
Benzo(k)fluoranthe

In [33]:
soil_analyte_df.value_counts()

Analyte                 RL_Comparison
GRO (C6-C10)            Yes              6
ORO (C28-C40)           Yes              6
Acetone                 Yes              5
Benzene                 Yes              5
Vinyl Chloride          Yes              4
Fluoranthene            Yes              3
DRO (C10-C28)           Yes              3
Phenanthrene            Yes              3
Toluene                 Yes              2
Methylcyclohexane       Yes              2
Naphthalene             Yes              1
Indeno(1,2,3-cd)pyrene  Yes              1
1,2,4-Trimethylbenzene  Yes              1
Pentachlorophenol       Yes              1
Xylenes, Total          Yes              1
m,p-Xylenes             Yes              1
Pyrene                  Yes              1
Dibenzo(a,h)anthracene  Yes              1
Fluorene                Yes              1
1-Methylnaphthalene     Yes              1
Chrysene                Yes              1
Benzo(k)fluoranthene    Yes              1
Benzo(g,h,i)pery

In [26]:
# concat with keys for air, soil and water
combined_df = pd.concat([air_analyte_df, water_analyte_df, soil_analyte_df], axis=1, keys=['Air', 'Water', 'Soil'], join='outer')


In [21]:
# display head and tail
display(combined_df.head(30))
display(combined_df.tail(30))

Unnamed: 0_level_0,Air,Air,Water,Water,Soil,Soil
Unnamed: 0_level_1,Analyte,RL_Comparison,Analyte,RL_Comparison,Analyte,RL_Comparison
0,Dichlorodifluoromethane (CFC 12),Yes,DRO (C10-C28),Yes,,
1,Chloromethane,Yes,ORO (C28-C40),Yes,,
2,,,GRO (C6-C10),Yes,,
7,Acetone,Yes,,,,
8,Trichlorofluoromethane,Yes,,,,
10,Methylene chloride,Yes,,,,
11,"1,1,2-Trichlorotrifluoroethane",Yes,,,,
17,"1,2-Dichloroethane",Yes,,,,
19,Benzene,Yes,,,,
20,Carbon Tetrachloride,Yes,,,,


Unnamed: 0_level_0,Air,Air,Water,Water,Soil,Soil
Unnamed: 0_level_1,Analyte,RL_Comparison,Analyte,RL_Comparison,Analyte,RL_Comparison
1478,Benzene,Yes,,,,
1479,Carbon Tetrachloride,Yes,,,,
1487,Toluene,Yes,,,,
1490,Tetrachloroethene,Yes,,,,
1493,"m,p-Xylenes",Yes,,,,
1500,"1,4-Dichlorobenzene",Yes,,,,
1502,,,Fluoranthene,Yes,,
1506,Dichlorodifluoromethane (CFC 12),Yes,,,,
1507,Chloromethane,Yes,,,,
1514,Trichlorofluoromethane,Yes,,,,


In [29]:
# display total number of chemcials


KeyError: 'Yes'