In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import warnings

# Suppress warning messages
warnings.filterwarnings('ignore')
pd.options.display.max_columns = 200
pd.options.display.max_rows = 2000
# display all rows & columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
tmdls = pd.read_csv('all_actions.csv', parse_dates=['TMDLDate'], date_parser=lambda t: pd.to_datetime(t, errors='coerce')).drop('Unnamed: 0', axis=1).rename(columns={'assessmentUnitIdentifier':'AUID'})
huc_npdes_permit_links = pd.read_csv('huc_npdes_permit_links.csv').rename(columns={'attributes.WBD_HUC12': 'HUC-12'})
huc_AUID_links = pd.read_csv('huc_AUID_links.csv').rename(columns={'attributes.assessmentunitidentifier':'AUID', 'attributes.huc12': 'HUC-12'})
permit_dates = pd.read_csv('permit_dates.csv', parse_dates=['effective_date','issue_date','original_effective_date','original_issue_date','expiration_date','retirement_date','termination_date'], date_parser=lambda t: pd.to_datetime(t, errors='coerce'))

In [3]:
len(tmdls.drop_duplicates('actionIdentifier'))

20009

In [4]:
tmdls_merged = tmdls.drop_duplicates().merge(huc_AUID_links, on='AUID', how='inner').merge(huc_npdes_permit_links, on='HUC-12', how='inner')

In [5]:
len(tmdls_merged.drop_duplicates('npdes_permit_id'))

260319

In [6]:
len(tmdls_merged.drop_duplicates('HUC-12'))

9166

In [7]:
len(tmdls_merged.drop_duplicates('AUID'))

69540

In [8]:
len(tmdls_merged.drop_duplicates('actionIdentifier'))

6626

In [9]:
tmdls_merged = tmdls_merged.merge(permit_dates, on='npdes_permit_id', how='inner')

In [10]:
tmdls_merged['incumbent'] = (tmdls_merged['issue_date'] <= tmdls_merged['TMDLDate'])

In [11]:
tmdls_merged.head()

Unnamed: 0,organizationIdentifier,organizationTypeText,state_code,actionIdentifier,actionTypeCode,actionStatusCode,completionDate,AUID,pollutantName,pollutantSourceTypeCode,explicitMarginofSafetyText,implicitMarginofSafetyText,TMDLEndPointText,TMDLDate,Unnamed: 0_x,HUC-12,Unnamed: 0_y,attributes.SOURCE_FEATUREID,npdes_permit_id,permit_state,is_currently_active_flag,effective_date,issue_date,original_effective_date,original_issue_date,expiration_date,retirement_date,termination_date,incumbent
0,21AWIC,State,AL,39070,TMDL,EPA Final Action,2010-09-21,AL03140107-0205-102,ENTEROCOCCUS BACTERIA,Nonpoint source,,N,"IN COASTAL WATERS, BACTERIA OF THE ENTEROCOCCI...",2010-09-21,342386,31401070000.0,21370,ALR107777,ALR107777,AL,False,2011-07-27,2011-07-27,2011-07-27,2011-07-27,2016-03-31,NaT,2019-04-30,False
1,21AWIC,State,AL,39070,TMDL,EPA Final Action,2010-09-21,AL03140107-0205-102,ENTEROCOCCUS BACTERIA,Nonpoint source,,N,"IN COASTAL WATERS, BACTERIA OF THE ENTEROCOCCI...",2010-09-21,342386,31401070000.0,24366,ALR10AAY0,ALR10AAY0,AL,False,2013-08-15,2013-08-15,2013-08-15,2013-08-15,2016-03-31,NaT,2020-03-13,False
2,21AWIC,State,AL,39070,TMDL,EPA Final Action,2010-09-21,AL03140107-0205-102,ENTEROCOCCUS BACTERIA,Nonpoint source,,N,"IN COASTAL WATERS, BACTERIA OF THE ENTEROCOCCI...",2010-09-21,342386,31401070000.0,27846,ALR10AM01,ALR10AM01,AL,False,2014-06-11,2014-06-11,2014-06-11,2014-06-11,2016-03-31,NaT,2020-03-13,False
3,21AWIC,State,AL,39070,TMDL,EPA Final Action,2010-09-21,AL03140107-0205-102,ENTEROCOCCUS BACTERIA,Nonpoint source,,N,"IN COASTAL WATERS, BACTERIA OF THE ENTEROCOCCI...",2010-09-21,342386,31401070000.0,27870,ALR10AA85,ALR10AA85,AL,True,2016-04-05,2016-04-05,2021-04-01,2013-06-12,2021-03-31,2021-03-31,NaT,False
4,21AWIC,State,AL,39070,TMDL,EPA Final Action,2010-09-21,AL03140107-0205-102,ENTEROCOCCUS BACTERIA,Nonpoint source,,N,"IN COASTAL WATERS, BACTERIA OF THE ENTEROCOCCI...",2010-09-21,342386,31401070000.0,27870,ALR10AA85,ALR10AA85,AL,True,2021-04-01,2021-04-01,2021-04-01,2013-06-12,2026-03-31,NaT,NaT,False


In [None]:
len(tmdls_merged.drop_duplicates('npdes_permit_id'))

In [None]:
tmdls_merged.drop_duplicates('npdes_permit_id')['incumbent'].value_counts