# GIS Data Compliance

In [1]:
import arcpy
import os
import pandas as pd
from arcpy import metadata as md


In [32]:
# TESTING FOLDER
root_directory = r"\\WM20ocqu46ph01\WF_Projects\DK_THO\1_INPUT\GRID\20240716_from_ecodoc_004773710-06_updated\JNHE - Onshore cable route engineering report - Land Cable\JNHE - Onshore cable route engineering report - Land Cable\test"

root_directory = r"\\WM20ocqu46ph01\WF_Projects\DK_THO\1_INPUT\SITEINV\20240718_Fugro_WP1_WP2\WP1_EAC"

In [31]:

def process_files(root_dir):
    """Loop through directories and process shapefiles and geodatabases."""
    data = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for dirname in dirnames:
            for filename in filenames:
                if filename.endswith(".shp"):                  
                    file_path = os.path.join(dirpath, filename)
                    metadata = arcpy.metadata.Metadata(file_path)
                    desc = arcpy.Describe(file_path)
                    spatial_ref = desc.spatialReference.name if desc.spatialReference else "Unknown"
                    data.append({"File Name": os.path.basename(file_path),
                                     'Full Path': file_path, 
                                     'Coordinate System': spatial_ref, 
                                     "Title": metadata.title if metadata.title else "",
                                     "Tags": metadata.tags if metadata.tags else "",
                                     "Summary": metadata.summary if metadata.summary else "",
                                     "Description": metadata.description if metadata.description else "",
                                     "Credits": metadata.credits if metadata.credits else ""})
        
            if dirname.endswith('.gdb'):    
                gdb_path = os.path.join(dirpath, dirname)
                arcpy.env.workspace = gdb_path
                feature_datasets = arcpy.ListDatasets(feature_type='feature')
                for feature_class in arcpy.ListFeatureClasses():
                            file_path = os.path.join(gdb_path, feature_class)
                            metadata = arcpy.metadata.Metadata(file_path)
                            desc = arcpy.Describe(file_path)
                            spatial_ref = desc.spatialReference.name if desc.spatialReference else "Unknown"
                            data.append({"File Name": os.path.basename(file_path),
                                         'Full Path': file_path, 
                                         'Coordinate System': spatial_ref, 
                                         "Title": metadata.title if metadata.title else "",
                                         "Tags": metadata.tags if metadata.tags else "",
                                         "Summary": metadata.summary if metadata.summary else "",
                                         "Description": metadata.description if metadata.description else "",
                                         "Credits": metadata.credits if metadata.credits else ""})
                if feature_datasets:
                    for fds in feature_datasets:
                        # Set the workspace to the feature dataset
                        arcpy.env.workspace = f"{gdb_path}\\{fds}"
                        for feature_class in arcpy.ListFeatureClasses():
                            file_path = os.path.join(gdb_path, feature_class)
                            metadata = arcpy.metadata.Metadata(file_path)
                            desc = arcpy.Describe(file_path)
                            spatial_ref = desc.spatialReference.name if desc.spatialReference else "Unknown"
                            data.append({"File Name": os.path.basename(file_path),
                                         'Full Path': file_path, 
                                         'Coordinate System': spatial_ref, 
                                         "Title": metadata.title if metadata.title else "",
                                         "Tags": metadata.tags if metadata.tags else "",
                                         "Summary": metadata.summary if metadata.summary else "",
                                         "Description": metadata.description if metadata.description else "",
                                         "Credits": metadata.credits if metadata.credits else ""})
   
    return data

def save_to_excel(data, output_excel):
    """Save the data to an Excel file."""
    df = pd.DataFrame(data)
    df.to_excel(output_excel, index=False)
    df
    print(df)

if __name__ == "__main__":
    output_excel_file = os.path.join(root_directory, "geospatial_data_info.xlsx")
    metadata_list = process_files(root_directory)
    save_to_excel(metadata_list, output_excel_file)
    print(f"Excel file generated: {output_excel_file}")


                                       File Name  \
0                     Annotatddions_2405bb08.shp   
1            Dune_Underground_Structeeure_LN.shp   
2  NL_IJV_I_WTG_L003_236D_DA3_67T_UTM31N_v1_edge   
3                     Annotatddions_2405bb08.shp   
4            Dune_Underground_Structeeure_LN.shp   

                                           Full Path       Coordinate System  \
0  \\WM20ocqu46ph01\WF_Projects\DK_THO\1_INPUT\GR...   WGS_1984_UTM_Zone_32N   
1  \\WM20ocqu46ph01\WF_Projects\DK_THO\1_INPUT\GR...  ETRS_1989_UTM_Zone_32N   
2  \\WM20ocqu46ph01\WF_Projects\DK_THO\1_INPUT\GR...  ETRS_1989_UTM_Zone_31N   
3  \\WM20ocqu46ph01\WF_Projects\DK_THO\1_INPUT\GR...   WGS_1984_UTM_Zone_32N   
4  \\WM20ocqu46ph01\WF_Projects\DK_THO\1_INPUT\GR...  ETRS_1989_UTM_Zone_32N   

                                        Title         Tags  \
0                          Annotations_240508  testing tag   
1                                                     test   
2  IJVER Site I - WT