# Explore the Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Read in processed file

In [None]:
vals = pd.read_excel('../data/processed/copc_data_indus_cleaned_2019-03-12_2014.xlsx')
vals.head()

In [None]:
vals.loc[vals['sample_id'] == 'DC-B51  2.9-4.2',:]

## Look at summary statistics

In [None]:
vals.describe()

In [None]:
vals.columns

## Create Scatter Plots

In [None]:
def make_graph(copc, sl):
    xval = vals['dro']
    yval = vals[copc]
    fig, ax = plt.subplots(figsize=(12,8))
    ax.plot(xval, yval, 'o', label=copc)
    #ax.axhline(y=sl, color='r',linestyle='-')#xmin=1, xmax=100000)
    ax.axvline(x=500, color='r',linestyle='-', label='DRO ISL')
    ax.set_yscale('log')
    ax.set_xscale('log')
    ax.set_ylabel(copc + ' (ug/L)')
    ax.set_xlabel('DRO (mg/kg)')
    ax.set_title(copc)
    ax.grid(b=True)

In [None]:
make_graph('Benzene', 200)
ax = plt.gca()
ax.axhline(y=5100, color='g', linestyle='-', label='RSL_Industrial_HQ=0.1')
ax.axhline(y=200, color='b', linestyle='-', label='Benzene ISL')
ax.set_ylim([.5, 10000])
ax.legend()
# The RSL for HQ 0.1 is the same
plt.savefig('../reports/plots/Benzene.png')

In [None]:
make_graph('Ethylbenzene', 5000)
ax = plt.gca()
ax.axhline(y=25000, color='g', linestyle='-', label='RSL_Industrial_HQ=0.1')
ax.axhline(y=5000, color='b', linestyle='-', label='Ethylbenzene ISL')
ax.legend()
# The RSL for HQ 0.1 is the same
plt.savefig('../reports/plots/Ethylbenzene.png')

In [None]:
make_graph('Toluene', 9000)
ax = plt.gca()
ax.axhline(y=4700000, color='g', linestyle='-',label='RSL_Industrial_HQ=0.1')
ax.axhline(y=9000, color='b', linestyle='-', label='Toluene ISL')
ax.legend()
# The RSL for HQ 0.1 is 4700000 and for HQ 1.0 is 47000000
plt.savefig('../reports/plots/Toluene.png')

In [None]:
make_graph('Xylene (Total)', 142000)
ax = plt.gca()
ax.axhline(y=250000, color='g', linestyle='-',label='RSL_Industrial_HQ=0.1')
ax.axhline(y=142000, color='b', linestyle='-', label='Xylenes ISL')
# The RSL for HQ 0.1 is 250000 and for HQ 1.0 is 2500000
ax.set_ylim([.0001, 1000000])
ax.legend()
plt.savefig('../reports/plots/Xylenes.png')

In [None]:
make_graph('Methyl-tert-butyl ether', 300)
ax = plt.gca()
ax.axhline(y=210000, color='g', linestyle='-',label='RSL_Industrial_HQ=0.1')
ax.axhline(y=300, color='b', linestyle='-', label='MTBE ISL')
# RSL for HQ 0.1 is 210000 (the same)
ax.legend()
plt.savefig('../reports/plots/MTBE.png')

In [None]:
make_graph('Naphthalene_voc', 51000)
ax = plt.gca()
ax.axhline(y=17000, color='g', linestyle='-',label='RSL_Industrial_HQ=0.1')
ax.axhline(y=51000, color='b', linestyle='-', label='Naphthalene ISL')
# RSL for HQ 0.1 is 17000 (same)
ax.legend()
plt.savefig('../reports/plots/Naphthalene.png')

In [None]:
make_graph('1,2-Dibromo-3-chloropropane', 64)
ax = plt.gca()
ax.axhline(y=64, color='g',linestyle='-',label='RSL_Industrial_HQ=0.1')
# RSL for HQ 0.1 is 64 (same)
ax.legend()
plt.savefig('../reports/plots/12DB3CP.png')

In [None]:
make_graph('Benzo(a)anthracene', 21000)
ax = plt.gca()
ax.axhline(y=21000, color='g',linestyle='-',label='RSL_Industrial_HQ=0.1')
# RSL for HQ 0.1 is 21000 (same)
ax.legend()
plt.savefig('../reports/plots/Benza.png')

In [None]:
make_graph('Benzo(a)pyrene', 2100)
ax = plt.gca()
ax.axhline(y=2100, color='g',linestyle='-',label='RSL_Industrial_HQ=0.1')
# RSL for HQ 0.1 is 2100 (same)
ax.legend()
plt.savefig('../reports/plots/Benzop.png')