In [2]:
# import necessary libraries

import pandas as pd
import xml.etree.ElementTree as ET
import requests
import os

In [3]:
def ADAMS_pull(document_type, year):
    # replace any spaces in the ADAMS Document type string with plus signs for the API call:
    doc_str = document_type.replace(' ','+')
    
    # construct the API link for the document type and year:
    
    API_url = 'https://adams.nrc.gov/wba/services/search/advanced/nrc?q=(mode:sections,sections:(filters:(public-library:!t),options:(within-folder:(enable:!f,insubfolder:!f,path:%27%27)),properties_search_all:!(!(DocumentType,starts,%27' + doc_str + '%27,%27%27),!(DocumentDate,range,(left:%2701/01/' + str(year) +'+12:00+AM%27,right:%2712/31/' + str(year) + '+11:59+PM%27),%27%27))))&qn=New&tab=advanced-search-pars&s=%24title&so=ASC'
    # creating HTTP response object from given url
    resp = requests.get(API_url)
    
    # save  the xml file
    with open('document_feed.xml', 'wb') as f:
        f.write(resp.content)

    # Read file and parse the xml:
    xml_data = open('document_feed.xml', 'r').read()
    root = ET.XML(xml_data)
    
    # Extract the desired elements from the xml tree:
    doc_titles = []
    doc_MLs = []
    doc_date = []
    doc_doctype = []
    doc_link = []
    for child in root.iter():
        if child.tag == 'AccessionNumber':
            doc_MLs.append(child.text)
            doc_link.append('https://www.nrc.gov/docs/'+ child.text[0:6] + '/' + child.text + '.pdf')
        elif child.tag == 'DocumentTitle':
            doc_titles.append(child.text)
        elif child.tag == 'DocumentDate':
            doc_date.append(child.text)
        elif child.tag == 'DocumentType':
            doc_doctype.append(child.text)
    
    # construct and return the desired data frame
    document_columns = ['Document Title', 'Accession Number','Document Date','Type','Link']
    doc_data = pd.DataFrame(list(zip(doc_titles,doc_MLs,doc_date,doc_doctype,doc_link)),columns = document_columns )
    
    return doc_data

In [4]:
# test call

LER_data = ADAMS_pull('Licensee Event Report',2021)

In [5]:
# display results

LER_data

Unnamed: 0,Document Title,Accession Number,Document Date,Type,Link
0,LER 2017-010-02 for Susquehanna Steam Electric...,ML21182A041,07/01/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2118/ML21182A041.pdf
1,LER 2018-005-02 for Susquehanna Steam Electric...,ML21182A042,07/01/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2118/ML21182A042.pdf
2,LER 2019-006-01 for Grand Gulf Nuclear Station...,ML21049A273,02/18/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2104/ML21049A273.pdf
3,"LER 2020-001-00 for Arkansas Nuclear One, Unit...",ML21039A557,02/08/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2103/ML21039A557.pdf
4,LER 2020-001-00 for Calvert Cliffs Nuclear Pow...,ML21027A171,01/27/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2102/ML21027A171.pdf
5,LER 2020-001-00 for Comanche Peak Nuclear Powe...,ML21041A178,02/10/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2104/ML21041A178.pdf
6,LER 2020-001-00 for Limerick Generating Statio...,ML21012A436,01/12/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2101/ML21012A436.pdf
7,LER 2020-001-00 for Vogtle Electric Generating...,ML21008A571,01/08/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2100/ML21008A571.pdf
8,"LER 2020-001-01 for Callaway Plant Unit 1, Eme...",ML21040A515,02/09/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2104/ML21040A515.pdf
9,LER 2020-001-01 for Joseph M. Farley Nuclear P...,ML21084A234,03/25/2021,"Letter, Licensee Event Report (LER)",https://www.nrc.gov/docs/ML2108/ML21084A234.pdf


In [6]:
# Load Existing OpE Document Master File:

ope_documents = pd.read_excel(os.path.expanduser("~")+'\\U.S. NRC\\ROPDashboards - Documents\\Databases\\OpE\\ope_documents_master.xlsx',sheet_name='OpE_docs')

XLRDError: No sheet named <'OpE_docs'>

In [None]:
count = 0
keep = []
a=len(ope_documents)
for ii in range(0,len(LER_data)):
    search_ML=LER_data['Accession Number'][ii]
    if search_ML not in ope_documents.values:
        count = count+1
        keep.append(1)
    else:
        keep.append(0)
        

LER_data["Keep"] = keep
new_data = LER_data[LER_data.Keep==1]
new_data.drop('Keep', inplace=True, axis=1)
new_data
ope_documents = ope_documents.append(new_data).reset_index(drop=True)
print(count)
print(len(ope_documents)-a)

In [None]:
# Update Part 21s:

P21_data = ADAMS_pull('Deficiency',2021)

In [None]:
count = 0
keep = []
a=len(ope_documents)
for ii in range(0,len(P21_data)):
    search_ML=P21_data['Accession Number'][ii]
    if search_ML not in ope_documents.values:
        count = count+1
        keep.append(1)
    else:
        keep.append(0)
        

P21_data["Keep"] = keep
new_data = P21_data[P21_data.Keep==1]
new_data.drop('Keep', inplace=True, axis=1)
new_data
ope_documents = ope_documents.append(new_data).reset_index(drop=True)
print(count)
print(len(ope_documents)-a)

In [None]:
# Update Information Notices:
IN_data = ADAMS_pull('NRC INFORMATION',2021)

In [None]:
count = 0
keep = []
a=len(ope_documents)
for ii in range(0,len(IN_data)):
    search_ML=IN_data['Accession Number'][ii]
    if search_ML not in ope_documents.values:
        count = count+1
        keep.append(1)
    else:
        keep.append(0)
        

IN_data["Keep"] = keep
new_data = IN_data[IN_data.Keep==1]
new_data.drop('Keep', inplace=True, axis=1)
new_data
ope_documents = ope_documents.append(new_data).reset_index(drop=True)
print(count)
print(len(ope_documents)-a)

In [None]:
# Update Information Notices:
RIS_data = ADAMS_pull('NRC Regulatory',2021)

In [None]:
count = 0
keep = []
a=len(ope_documents)
for ii in range(0,len(RIS_data)):
    search_ML=RIS_data['Accession Number'][ii]
    if search_ML not in ope_documents.values:
        count = count+1
        keep.append(1)
    else:
        keep.append(0)
        

RIS_data["Keep"] = keep
new_data = RIS_data[RIS_data.Keep==1]
new_data.drop('Keep', inplace=True, axis=1)
new_data
ope_documents = ope_documents.append(new_data).reset_index(drop=True)
print(count)
print(len(ope_documents)-a)

In [None]:
ope_documents.to_csv(os.path.expanduser("~")+'\\U.S. NRC\\ROPDashboards - Documents\\Databases\\OpE\\ope_documents_master.tsv',sep='\t',index=False)