In [None]:
# Ref: https://towardsdatascience.com/create-a-simple-app-quickly-using-jupyter-notebook-312bdbb9d224

In [1]:
from xml.etree import ElementTree
import csv
from datetime import datetime
import sys
import ipywidgets as widgets

In [14]:
def oira_csvlineadd(xml_name, year, csvfile_writer):
    
    # Parse XML File
    xml = ElementTree.parse(xml_name+"EO_RULE_COMPLETED_"+str(year)+".xml")
            
    # For each regulatory act:
    for regact in xml.findall("REGACT"):
            if (regact): 
                # Extract Reg act details:
                agency_code = regact.find("AGENCY_CODE").text
                rin = regact.find("RIN").text
                title = regact.find("TITLE").text   
                stage = regact.find("STAGE").text                    
                ES = regact.find("ECONOMICALLY_SIGNIFICANT").text
                date_received = regact.find("DATE_RECEIVED").text       
                legal_deadline = regact.find("LEGAL_DEADLINE").text
                date_completed = regact.find("DATE_COMPLETED").text
                decision = regact.find("DECISION").text        
                if regact.find("DATE_PUBLISHED")!=None:
                    date_published = regact.find("DATE_PUBLISHED").text
                else: date_published = "NA"
                if regact.find("HEALTH_CARE_ACT")!=None:
                    health_care_act = regact.find("HEALTH_CARE_ACT").text
                else: health_care_act = "NA"
                if regact.find("DODD_FRANK_ACT")!=None:
                    Dood_Frank_Act = regact.find("DODD_FRANK_ACT").text   
                else: Dood_Frank_Act = "NA"
                if regact.find("INTERNATIONAL_IMPACTS")!=None:
                    international_impacts = regact.find("INTERNATIONAL_IMPACTS").text
                else: international_impacts = "NA"
                if regact.find("UNFUNDED_MANDATES")!=None:
                    unfunded_mandates = regact.find("UNFUNDED_MANDATES").text
                else: unfunded_mandates = "NA"
                if regact.find("MAJOR")!=None:
                    major = regact.find("MAJOR").text
                else: major = "NA"
                if regact.find("HOMELAND_SECURITY")!=None:
                    homeland_security = regact.find("HOMELAND_SECURITY").text
                else: homeland_security = "NA"
                if regact.find("REGULATORY_FLEXIBILITY_ANALYSIS")!=None:
                    regulatory_flexibility_analysis = regact.find("REGULATORY_FLEXIBILITY_ANALYSIS").text
                else: regulatory_flexibility_analysis = "NA"
                        
                        
                csv_line = [agency_code, rin, title, stage, ES, date_received, 
                            legal_deadline, date_completed, decision, date_published, 
                            health_care_act, Dood_Frank_Act, international_impacts,
                            unfunded_mandates, major, homeland_security, regulatory_flexibility_analysis]
                
                # Add a new row to CSV File
                csvfile_writer.writerow(csv_line)
                
    return 
            
def get_option():
    print("Enter: \n '1' to convert all XMLs into seperate CSV files,"
      "\n '2' to convert all XMLs into one CSV file,"
      "\n '3' to select an individual year and convert it to a CSV file.")
    option = widgets.IntText(value=3)
    display(option)
    value = int(option.value)
    
    return value

def get_year(value):
    if value==3:
        print("Select the year:")
        select_year=widgets.FloatText(value=1981)
        display(select_year)
        year = int(select_year.value)
    else:
        year=None
        
    return year

            
def convert_OIRAxmltocsv():
    
    xml_name = "./Raw Data/"
    output_path="./CSV Data/"
    currentYear = datetime.now().year
    Years = list(range(1981, currentYear))
    
    value=get_option()
    Year=get_year(value)
        
    if value == 1:
        for Year in Years:
            
            # Create CSV File
            csvfile = open(output_path+"EO_RULE_COMPLETED_"+str(Year)+".csv",'w',encoding='utf-8')
            csvfile_writer = csv.writer(csvfile)

            # Adding header to CSV File:
            csvfile_writer.writerow(["agency_code","rin","title","stage","ES",
                                     "date_received","legal_deadline","date_completed", "decision",
                                     "date_published", "health_care_act","Dood_Frank_Act","international_impacts",
                                     "unfunded_mandates","major","homeland_security","regulatory_flexibility_analysis"])
        
            
            oira_csvlineadd(xml_name, Year, csvfile_writer)


            csvfile.close()
            print("The CSV file has been created for year", Year)
            
    elif value == 2:
        # Create CSV File
        csvfile = open(output_path+"EO_RULE_COMPLETED_"+str(Years[0])+"_to_"+str(Years[-1])+".csv",'w',encoding='utf-8')
        csvfile_writer = csv.writer(csvfile)

        # Adding header to CSV File:
        csvfile_writer.writerow(["agency_code","rin","title","stage","ES",
                                 "date_received","legal_deadline","date_completed", "decision",
                                 "date_published", "health_care_act","Dood_Frank_Act","international_impacts",
                                 "unfunded_mandates","major","homeland_security","regulatory_flexibility_analysis"])
        
        
        for Year in Years:
            
            oira_csvlineadd(xml_name, Year, csvfile_writer)               
            print(f"The year {Year} has been added to the CSV file")
            
        csvfile.close()
        
    
    elif value == 3:
        
        for retry in range(3):       
            if Year in Years:
            
                # Create CSV File
                csvfile = open(output_path+"EO_RULE_COMPLETED_"+str(Year)+".csv",'w',encoding='utf-8')
                csvfile_writer = csv.writer(csvfile)

                # Adding header to CSV File:
                csvfile_writer.writerow(["agency_code","rin","title","stage","ES",
                                 "date_received","legal_deadline","date_completed", "decision",
                                "date_published", "health_care_act","Dood_Frank_Act","international_impacts",
                                "unfunded_mandates","major","homeland_security","regulatory_flexibility_analysis"])
            
            
                oira_csvlineadd(xml_name, Year, csvfile_writer)
            
                csvfile.close()
                print("The CSV file has been created for year", Year)
                break
                
            print(f"Pick an year between 1981 and {currentYear-1}")
        else:
            print("You have made invalid choices")
            sys.exit(1)
                
    return  
            

In [6]:
print("Enter: \n '1' to convert all XMLs into seperate CSV files,"
      "\n '2' to convert all XMLs into one CSV file,"
      "\n '3' to select an individual year and convert it to a CSV file.")
option = widgets.FloatText(value=3)
display(option)

Enter: 
 '1' to convert all XMLs into seperate CSV files,
 '2' to convert all XMLs into one CSV file,
 '3' to select an individual year and convert it to a CSV file.


FloatText(value=3.0)

In [15]:
convert_OIRAxmltocsv()

Enter: 
 '1' to convert all XMLs into seperate CSV files,
 '2' to convert all XMLs into one CSV file,
 '3' to select an individual year and convert it to a CSV file.


IntText(value=3)

Select the year:


FloatText(value=1981.0)

The CSV file has been created for year 1981
