# Install xmltodict (Not Default with ArcGIS Pro)

In [None]:
#Install xmltodict
##Sometimes one of these works and the other doesn't, IDK
try:
    %pip install xmltodict

except:
    !pip install xmltodict

# Define Functions

In [48]:
import os.path
import xmltodict

#Define functions
def arcgis_metadata_dict(items):
    """
    Formats items from ArcGIS Portal into a dictionary of metadata.
    
    Parameters:
    * `items`: A list of items from ArcGIS Portal such as from the `gis.content.search` function.
    """
    metadata = {}
    for item in items:
        #Read XML Data to Dictionaries
        if "Error: Metadata file does not exist or is inaccessible" not in item.metadata:
            if "metadata.xml" in item.metadata:
                with open(item.metadata, 'rb') as file:
                    metadata[item.id] = xmltodict.parse(file)['metadata']
                    
        #If no Metadata Exists, Assign Null
        if item.id not in metadata:
            metadata[item.id] = None
            
    return metadata
    
def format_metadata_dict(input_dict: dict, fields: list, skip_none = True):
    """
    Pulls data from metadata fields and exports it to a flattened table format.
    
    Parameters:
    * `input_dict`: Dictionary outputted from `arcgis_metadata_dict`.
    * `fields`: Fields to export from the dictionary mapped to new names to use. 
    These are written with slashes between for each level. Ex: "Esri/CreaDate" 
    to get creation date.
    * `skip_none`: If items with no metadata file should be skipped.
        * Default: `None`
    """
    #Convert Fields to a List
    if not isinstance(fields, dict):
        raise TypeError('Fields must be a dictionary.')
    fields_map = fields
    fields = [field.split('/') for field in fields]

    #Iterate Through Items
    out_table = []
    for item in input_dict:
        #Get Dictionary for Specific Item
        item_dict = input_dict[item]
        table_row = {'id':item}
        
        #if No Data, Skip or Assign None to All Fields
        if item_dict is None:
            if skip_none is True:
                continue
                
            for field in list(fields_map.keys()):
                table_row[fields_map[field]] = None
            
            #Add Row to Table
            out_table.append(table_row)
            continue
        
        for index, field in enumerate(fields):
            #Traverse through Dictionaries to Get Values
            for level in field:
                try:
                    try:
                        out_field_data
                        out_field_data = out_field_data[level]
                    except:
                        out_field_data = item_dict[level]
                except:
                    out_field_data = None
            
            #Add Value to Table Row
            table_row[fields_map[list(fields_map.keys())[index]]] = out_field_data
            del out_field_data
        
        #Add Row to Table
        out_table.append(table_row)

    return out_table

# Usage Example

In [49]:
from arcgis import GIS
import pandas as pd

dict_fields = {'Esri/CreaDate':'CreateDate'}
csv_file = ''

#Login to ArcGIS Portal
gis = GIS('pro')

#Get Items
##This only gives you 500 items
items = gis.content.search('*', max_items = 500)

#Convert XML Files to Nested Dictionaries
metadata_dict = arcgis_metadata_dict(items)

#Pull Data from Dictionaries to Make a Flat Table
metadata_table = format_metadata_dict(input_dict = metadata_dict, fields = dict_fields, skip_none = True)

#Conver to DataFrame
metadata_df = pd.DataFrame(metadata_table)

#Write to CSV File (Uncomment to Use)
# --- pd.metadata_df.to_csv(csv_file, index = False)

#View DataFrame
metadata_df

Unnamed: 0,id,CreateDate,CitationResTitle
0,55aa631d1fd945d38b3b8106af09ff6c,20190910,SWAP_WM
1,25476a17a54a40eda5d4d78ab6fe151a,20230807,Zac Testing Map
2,caaaed1472194052bc8dc940eef6eb4b,20240522,
3,f41b38e488bf461282b5450b4ec8cd26,20220224,"{'@Sync': 'TRUE', '#text': 'ST_ServiceArea'}"
4,1c3f8c6258aa4b4eacc24c0e42ed925f,20240126,"{'@Sync': 'TRUE', '#text': 'Water Treatment Pl..."
...,...,...,...
98,467a9ae929354526af742e9068f80325,20231212,Map
99,0eccf93e0e764a5c9c21fdda81f01c5a,20240126,"{'@Sync': 'TRUE', '#text': 'PFAS Detected at P..."
100,95aa64ee892844378c112acc326e06a1,20210311,Complaints Received
101,a371e1a9076344a5af59bfcbe9cb795c,20201019,UnionCo
