In [11]:
import xml.etree.ElementTree as et
import logging
import pandas as pd
from typing import Union

# logging at level INFO
log = logging.getLogger("xml_parse")
logging.basicConfig(level=logging.INFO)

# parse the XML and get the root 
def parse_XML(xml_file):

    print(f"Dictonary for STIG_INFO from {xml_file}")
  
    # set the root node for the tree structure
    root = et.parse(xml_file).getroot()

    # call to get_stig_info and create dictionary
    si_data_dict = get_stig_info(root)

    # call to get_vuln and create list 
    (stig_data_list, column_name_list) = get_vuln(root)

    stigs_df = pd.DataFrame(stig_data_list, columns=column_name_list)
    print(stigs_df.head())
    # # print(si_data_dict)
    # print(si_data_dict)
    # print(stig_data_list)

# create dictionary for SI_DATA
def get_stig_info(root) -> dict[str, Union[str, bool]]:
    stig_info_elem = root.find('.//STIG_INFO')
    si_data_dict = {}
    for si_data in stig_info_elem.findall("SI_DATA"):
        sid_name = si_data.find("SID_NAME").text 
        sid_data_el = si_data.find("SID_DATA")

        # not all the tags are the same within SI_DATA
        if sid_data_el is None:
            sid_data = True
        else:
            sid_data = sid_data_el.text

        si_data_dict[sid_name] = sid_data
    
    return si_data_dict

def get_vuln(root):
    vuln_elems = root.findall('.//VULN')
    stig_data_list = []
    column_names = [elem.text for elem in vuln_elems[0].findall("STIG_DATA/VULN_ATTRIBUTE")]
    print(column_names)

    for vuln_elem in vuln_elems:
        vuln_elem_data_list = []
        for stig_attribute_data_elem in vuln_elem.findall("STIG_DATA/ATTRIBUTE_DATA"):
            if stig_attribute_data_elem is None:
                attribute_data = None
            else:
                attribute_data = stig_attribute_data_elem.text
            vuln_elem_data_list.append(attribute_data)
        stig_data_list.append(vuln_elem_data_list)
    return (stig_data_list, column_names)
    
# call the file to be parsed and printed
parse_XML ("CHECKLIST_TEMPLATE_RHEL.ckl")


Dictonary for STIG_INFO from CHECKLIST_TEMPLATE_RHEL.ckl
['Vuln_Num', 'Severity', 'Group_Title', 'Rule_ID', 'Rule_Ver', 'Rule_Title', 'Vuln_Discuss', 'IA_Controls', 'Check_Content', 'Fix_Text', 'False_Positives', 'False_Negatives', 'Documentable', 'Mitigations', 'Potential_Impact', 'Third_Party_Tools', 'Mitigation_Control', 'Responsibility', 'Security_Override_Guidance', 'Check_Content_Ref', 'Weight', 'Class', 'STIGRef', 'TargetKey', 'STIG_UUID', 'LEGACY_ID', 'LEGACY_ID', 'CCI_REF']
   Vuln_Num Severity               Group_Title                Rule_ID  \
0  V-230221     high  SRG-OS-000480-GPOS-00227  SV-230221r627750_rule   
1  V-230222   medium  SRG-OS-000480-GPOS-00227  SV-230222r627750_rule   
2  V-230223     high  SRG-OS-000033-GPOS-00014  SV-230223r627750_rule   
3  V-230224   medium  SRG-OS-000185-GPOS-00079  SV-230224r627750_rule   
4  V-230225   medium  SRG-OS-000023-GPOS-00006  SV-230225r627750_rule   

         Rule_Ver                                         Rule_Title  \
0