In [None]:
### import libraies
import arcpy
import pandas as pd
import numpy as np
import os

In [30]:

coordSys = "PROJCS['WGS_1984_Web_Mercator_Auxiliary_Sphere',GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Mercator_Auxiliary_Sphere'],PARAMETER['False_Easting',0.0],PARAMETER['False_Northing',0.0],PARAMETER['Central_Meridian',0.0],PARAMETER['Standard_Parallel_1',0.0],PARAMETER['Auxiliary_Sphere_Type',0.0],UNIT['Meter',1.0]]"
gdb_path = r'E:\Project_DataIntegrity\Municipality.gdb'
main_dir = r'E:\Project_DataIntegrity'
spatial_Reference = "PROJCS['NAD_1983_StatePlane_California_VI_FIPS_0406_Feet',GEOGCS['GCS_North_American_1983',DATUM['D_North_American_1983',SPHEROID['GRS_1980',6378137.0,298.257222101]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Lambert_Conformal_Conic'],PARAMETER['False_Easting',6561666.666666666],PARAMETER['False_Northing',1640416.666666667],PARAMETER['Central_Meridian',-116.25],PARAMETER['Standard_Parallel_1',32.78333333333333],PARAMETER['Standard_Parallel_2',33.88333333333333],PARAMETER['Latitude_Of_Origin',32.16666666666666],UNIT['Foot_US',0.3048006096012192]];-118608900 -91259500 3048.00609601219;-100000 10000;-100000 10000;3.28083333333333E-03;0.001;0.001;IsHighPrecision"

### create feature dataset inside file gdb
featureDataset_name_ls  = [ 'Boundaries', 'Transportation', 'Education' ]

#### Geodatabase fundamental schema

<img src= 'images/GDB_schema_breakdown.png' alt= 'Geodatabase simple schema' width= '600'/>

In [None]:
#### create a geodatabase
featureDataset_name_ls  = [ 'Boundaries', 'Transportation', 'Education' ]
with arcpy.EnvManager( outputCoordinateSystem= coordSys ):
    for e_featureDataset in featureDataset_name_ls:
        arcpy.management.CreateFeatureDataset(   out_dataset_path= gdb_path, out_name= e_featureDataset, spatial_reference= spatial_Reference   )

In [7]:
### import feature class into gdb (Boundaries) from shp files
feature_name_ls = [ 'sd_city95.shp', 'sd_own.shp', 'sd_tract.shp' ]
source_dir = 'Boundaries'
feature_source_path = os.path.join( main_dir, source_dir )

feature_ls = ';'.join(  os.path.join(feature_source_path, e_featureNm) for e_featureNm in feature_name_ls  ) + ';'
target_gdb = os.path.join( gdb_path, 'Boundaries' )

arcpy.conversion.FeatureClassToGeodatabase( Input_Features= feature_ls, Output_Geodatabase= target_gdb )


### import feature class into gdb (Education) from shp file
feature_name_ls = [ 'sd_colleges.shp' ]
source_dir = 'Education'
feature_source_path = os.path.join( main_dir, source_dir )

feature_ls = ';'.join(  os.path.join(feature_source_path, e_featureNm) for e_featureNm in feature_name_ls  ) + ';'
target_gdb = os.path.join( gdb_path, 'Education' )

arcpy.conversion.FeatureClassToGeodatabase( Input_Features= feature_ls, Output_Geodatabase= target_gdb )

### import feature class into gdb (Transportation)  from shp files
feature_name_ls = [ 'sd_railstop.shp', 'sd_rr.shp' ]
source_dir = 'Transportation'
feature_source_path = os.path.join( main_dir, source_dir )

feature_ls = ';'.join(  os.path.join(feature_source_path, e_featureNm) for e_featureNm in feature_name_ls  ) + ';'
target_gdb = os.path.join( gdb_path, 'Transportation' )

arcpy.conversion.FeatureClassToGeodatabase( Input_Features= feature_ls, Output_Geodatabase= target_gdb )

'E:\\Project_DataIntegrity\\Boundaries\\sd_city95.shp;E:\\Project_DataIntegrity\\Boundaries\\sd_own.shp;E:\\Project_DataIntegrity\\Boundaries\\sd_tract.shp;'

In [None]:
_target_feature = r'Education\sd_schools'
target_feature = os.path.join( gdb_path, _target_feature )
target_feature

'E:\\Project_DataIntegrity\\Municipality.gdb\\Education\\sd_schools'

In [None]:
### import CAD feature into gdb (Transportation)

_cad_feature = 'cad_files\schools.DWG\Point'
cad_feature = os.path.join( main_dir, _cad_feature )

_target_feature = r'Education\sd_schools'
target_feature = os.path.join( gdb_path, _target_feature )


arcpy.conversion.ExportFeatures( in_features= cad_feature, out_features= target_feature,
    where_clause= '', use_field_alias_as_name= 'NOT_USE_ALIAS',
    sort_field= None
)

<br><hr>

#### Defining `Subtypes` and implementing on parcel data.

<hr><br>


##### A relationship class establishes a schematic link between feature classes and subtypes.

<BR>

<img src= 'images/Maintaining_Attribute_Integrity_1.png' alt= 'Geodatabase simple schema' width= '500'/>


In [18]:
featureClass_nm = 'landParcels'
featureClass_path = os.path.join( gdb_path, featureClass_nm )

'E:\\Project_DataIntegrity\\Municipality.gdb\\landParcels'

In [None]:
### field to use for subtypes
subtype_field = 'LOT_TYPE'
### set the subtype field
arcpy.SetSubtypeField_management( featureClass_path, subtype_field )

## Add subtypes (Code, Name)
arcpy.AddSubtype_management( featureClass_path, 1, 'RESIDENTIAL - SINGLE FAMILY' )
arcpy.AddSubtype_management( featureClass_path, 2, 'RESIDENTIAL - MULTI FAMILY' )
arcpy.AddSubtype_management( featureClass_path, 3, 'FLATS/CONVERSIONS' )
arcpy.AddSubtype_management( featureClass_path, 4, 'GARAGE/UNIMPROVED LAND' )
arcpy.AddSubtype_management( featureClass_path, 6, 'COMMERCIAL' )

default_LotType_code = 1  ## set a default subtype
arcpy.SetDefaultSubtype_management( featureClass_path, default_LotType_code )

In [None]:
#### create relationship class

relation_feature_class = os.path.join( gdb_path, 'Parcel_Building_Relation' )

arcpy.management.CreateRelationshipClass(
    origin_table= 'landParcels', destination_table='BuildingOutlines', out_relationship_class= relation_feature_class,
    relationship_type= 'COMPOSITE', forward_label= 'BuildingOutlines', backward_label='landParcels',
    message_direction='NONE', cardinality='ONE_TO_MANY', attributed='NONE',
    origin_primary_key='OID', origin_foreign_key='OID',
)

<br><hr>

#### Defining `Domain` and implementing.
<hr><br>

##### Domain values provide additional information by specifying the values that each domain contains.
<br>

In [21]:
feature_class_nm = 'Roads\Park_roads'
feature_class = os.path.join( gdb_path, feature_class_nm )

'E:\\Project_DataIntegrity\\Municipality.gdb\\Roads\\Park_roads'

In [35]:
##  assign Domain parameters 
domain_name1 = 'Accessibility'
domain_desc1 = 'Road type based on access'
field_type = 'TEXT'  
domain_type = 'CODED'
split_policy = 'DUPLICATE'
field_name1= 'UNPAVED'
coded_values1 = {  'No': 'Unpaved Road',  'Yes': 'Paved Road'  }

##  Create the domain 
existing_domains = arcpy.da.Listexisting_domains( gdb_path )
# check if it exists already
if any(  d.name == domain_name1  for d in existing_domains  ):
    print( f'Domain - {domain_name1} already exists. Skipping creation.' )
else:
    arcpy.management.CreateDomain(
        in_workspace= gdb_path, domain_name= domain_name1,
        domain_description= domain_desc1, field_type= field_type, domain_type= domain_type, split_policy= split_policy
    )
    print( f'Domain - {domain_name1} created.' )
    
## add domain coded values
try:
    for code, desc in coded_values1.items():
        arcpy.management.AddCodedValueToDomain( in_workspace=gdb_path,
            domain_name= domain_name1, code= code, code_description= desc
        )
    print( f'Coded values added to {domain_name1}.' )
except Exception as e:
    print( f'Error adding coded values to domain: {e}' )

## Assign the domain to the UNPAVED field 
try: 
    arcpy.management.AssignDomainToField( in_table= feature_class,
        field_name= field_name1, domain_name= domain_name1  )
    print( f'Domain - {domain_name1} assigned to field - {field_name1}.' )
except Exception as e:
    print( f'Error assigning domain: {e}' )

Domain - Accessibility created.
Coded values added to Accessibility.
Domain - Accessibility assigned to field - UNPAVED.


In [24]:
## QAQC : check code that confirms the respective domain values have been set

field_domain = [  f.domain for f in arcpy.ListFields(feature_class) if f.name == field_name1  ]

if domain_name1 in field_domain:
    print(f"Confirmed: Field {field_name1} has domain '{domain_name1}'.")
    domain_obj = next( (d for d in arcpy.da.ListDomains(gdb_path) if d.name == domain_name1), None)
    if domain_obj and hasattr(domain_obj, 'codedValues'):
        if domain_obj.codedValues == coded_values1:  print('Confirmed: Domain coded values match expected values.')
        else:
            print('Warning: Domain coded values do not match expected values.\n')
            print( f'Actual Code values --> {domain_obj.codedValues}\nCurrent coded values --> {coded_values1}' ) 

else:  print(f"Error: Domain '{domain_name1}' not assigned to field {field_name1}.")

Confirmed: Domain coded values match expected values.


<br>

### Create `Contingent values` in an attribute field

    - integrate attributes containing seasonal & unpaved roads
    - set up the domains with the corresponding attributes
    - build contingent values
    - could be used to validate safety & drivability

In [26]:
### Create domain

##  assign Domain parameters 
domain_name2 = 'Seasonality'
domain_desc2 = 'Roads access during seasons'
field_type = 'TEXT'  
domain_type = 'CODED'
split_policy = 'DUPLICATE'
field_name2 = 'SEASONAL'
coded_values2 = {  'No': 'Open all year round',  'Yes': 'Open during season only'  }

##  Create the domain 

domains = arcpy.da.ListDomains(gdb_path)
# check if it exists already
if any( d.name == domain_name2 for d in domains ):
    print(f"Domain '{domain_name2}' already exists. Skipping creation.")
else:
    arcpy.management.CreateDomain(
        in_workspace= gdb_path, domain_name= domain_name2,
        domain_description= domain_desc2, field_type= field_type, domain_type= domain_type, split_policy= split_policy
    )
    print(f"Domain '{domain_name2}' created.")

## add domain coded values
try:
    for code, desc in coded_values2.items():
        arcpy.management.AddCodedValueToDomain( in_workspace=gdb_path,
            domain_name= domain_name2, code= code, code_description= desc
        )
    print(f"Coded values added to '{domain_name2}'.")
except Exception as e:
    print(f'Error adding coded values to domain: {e}')


## Assign the domain to the field field_name
try:
    arcpy.management.AssignDomainToField( in_table= feature_class,
        field_name= field_name2, domain_name= domain_name2  )
    print( f"Domain '{domain_name2}' assigned to field '{field_name2}'." )
except Exception as e:
    print( f'Error assigning domain: {e}' )

Domain 'Seasonality' created.
Coded values added to 'Seasonality'.
Domain 'Seasonality' assigned to field 'SEASONAL'.


In [27]:
#### check code that confirms the respective domain values have been set

field_domain = [  f.domain for f in arcpy.ListFields(feature_class) if f.name == field_name2  ]

if domain_name2 in field_domain:
    print(  f"Confirmed: Field {field_name2} has domain '{domain_name2}'."  )
    domain_obj = next( (d for d in arcpy.da.ListDomains(gdb_path) if d.name == domain_name2), None)
    if domain_obj and hasattr(domain_obj, 'codedValues'):
        if domain_obj.codedValues == coded_values2:  print('Confirmed: Domain coded values match expected values.')
        else:
            print( 'Warning: Domain coded values do not match expected values.\n' )
            print( f'Actual Code values --> {domain_obj.codedValues}\nCurrent coded values --> {coded_values2}' ) 
 
else:  print( f"Error: Domain '{domain_name2}' not assigned to field {field_name2}." )

print('Confirmed: Domain coded values match expected values.')

Confirmed: Domain coded values match expected values.


In [None]:
# Create the field group used by contingent values.

field_grp_nm = 'SafeDrivability'
fields2add = [ field_name1, field_name2 ]

arcpy.management.CreateFieldGroup(
    target_table= feature_class, 
    name= field_grp_nm, fields= fields2add, is_restrictive= 'RESTRICT' 
)

## Add the two contingent-value set

### for contigency 'Paved Road' --> 'Open All Year Round'
contigency_val_1 = [
    [  field_name1, 'CODED_VALUE', 'Yes' ],  ## translates to  UNPAVED  --> 'Paved Road'
    [  field_name2, 'CODED_VALUE', 'No'  ]   ## translates to  SEASONAL --> 'Open All Year Round'
]

### for contigency  'Unpaved Road'  --> 'Open during season only'
contigency_val_2 = [
    [  field_name1, 'CODED_VALUE', 'No'   ],    ## translates to  UNPAVED  --> 'Unpaved Road'
    [  field_name2, 'CODED_VALUE', 'Yes'  ]     ## translates to  SEASONAL --> 'Open during season only'
]

### add contingency 1
arcpy.management.AddContingentValue(
    target_table= feature_class, field_group_name= field_grp_nm,
    values= contigency_val_1, subtype= None, retire_value= 'DO_NOT_RETIRE'
)

## add contingency 2
arcpy.management.AddContingentValue(
    feature_class, field_grp_nm,
    contigency_val_2, subtype= None, retire_value= 'DO_NOT_RETIRE'
)

In [28]:
## paths
feature_dataset_nm = 'PipeSystem'
feature_class_nm = 'Pipes'
feature_class = os.path.join( gdb_path, feature_dataset_nm, feature_class_nm )
feature_class

'E:\\Project_DataIntegrity\\Municipality.gdb\\PipeSystem\\Pipes'

In [None]:
desc = arcpy.Describe(feature_class)
### GlobalIDs are required; add if missing (pre-requisites for validation rules)
if not getattr( desc, 'hasGlobalID', False ):
    arcpy.management.AddGlobalIDs( [feature_class] ) 

## Editor tracking must be enabled for VALIDATION rules
try:
    arcpy.management.EnableEditorTracking(
        in_dataset=feature_class, creator_field= 'created_user', creation_date_field= 'created_date',
        last_editor_field= 'last_edited_user', last_edit_date_field= 'last_edited_date',
        add_fields= 'ADD_FIELDS', record_dates_in= 'UTC'
    )
except arcpy.ExecuteError:  ## If already enabled or fields exist, skip
    print( 'Editor tracking already enabled or fields exist. Skipping...' )
    pass

### hard-coded Arcade expression for validation -->
## This expression checks if the wall thickness is between 0.13 and 0.6 and the operating pressure is between 40 and 44.
## If both conditions are met, it returns 'Pass', otherwise it returns 'Fail'.
arcade_expr = r'''
var wallThickness = $feature.WALLTHICKNESS; 
var operatingPressure = $feature.OPERATINGPRESSURE; 
var WallStatus = IIf(wallThickness >= 0.13 && wallThickness <= 0.6, 'Pass', 'Fail'); 
var PressureStatus = IIf(operatingPressure >= 40 && operatingPressure <= 44, 'Pass', 'Fail'); 
var PipeValid = IIf(WallStatus == 'Pass' && PressureStatus == 'Pass', 'Pass', 'Fail'); 
return PipeValid == 'Pass';
'''

### Add the validation rule to the Pipes feature class
arcpy.management.AddAttributeRule(
    in_table= feature_class, name= 'ResidentialPipes', 
    type= 'VALIDATION', script_expression= arcade_expr,
    error_number= 101, error_message= 'Invalid entry',
    description= 'Making sure that the pipe wall thickness and operating pressure is within safe range for residential use.',
    subtype= 'Plastic PE',         # scope to the Plastic PE subtype
    batch= 'BATCH'                 # required for VALIDATION rules
)

### update CONSTRAINT rule (PEPipes) with Insert/Update/Delete triggers ---
arcpy.management.AddAttributeRule(
    in_table= feature_class, name= 'PEPipes',
    type= 'CONSTRAINT', script_expression= arcade_expr,
    triggering_events= 'INSERT;UPDATE;DELETE',
    error_number= 101, error_message= 'Invalid entry',
    description= 'Making sure that the pipe wall thickness and operating pressure is within safe range for residential use.',
    subtype= 'Plastic PE'          ## scope to the Plastic PE subtype
)

# run batch validation for the entire file geodatabase
arcpy.management.EvaluateRules(
    in_workspace= gdb_path, evaluation_types= 'VALIDATION_RULES'
)


<BR><HR>

### Spatial Data Integrity

<hr><br>



##### Different topological relationship among Feature classes

<BR>

<img src= 'images/topology_relation.png' alt= 'topological relationship' width= '500'/>


In [46]:
### input feature class paths

road_feature_name = 'Roads\Street'
parcel_feature_name = 'parcel\landParcels'
building_feature_name = 'parcel\BuildingOutlines'
topology_name = 'parcel\City_Topology'

feature_dataset = os.path.join( gdb_path, 'parcel' )
parcels_fc = os.path.join( gdb_path, parcel_feature_name )
buildings_fc = os.path.join( gdb_path, building_feature_name )
roads_fc = os.path.join( gdb_path, road_feature_name )
topology_path = os.path.join( gdb_path, topology_name )

'E:\\Project_DataIntegrity\\Municipality.gdb\\parcel\\City_Topology'

In [None]:
## Create a geodatabase topology (XY cluster tolerance = dataset default)
arcpy.management.CreateTopology( in_dataset= feature_dataset, out_name= topology_name )  

## Add the 3 feature classes to the topology rules with XY & Z Rank = 1
for e_featureClass in ( parcels_fc, buildings_fc, roads_fc ):
    arcpy.management.AddFeatureClassToTopology(
        in_topology= topology_path, in_featureclass= e_featureClass,
        xy_rank= 1, z_rank= 1
    )

## Add topology rules
# --> Buildings must be contained within Parcels
arcpy.management.AddRuleToTopology(
    in_topology= topology_path, rule_type= 'Must Be Covered By (Area-Area)',
    in_featureclass  = buildings_fc, subtype= '',
    in_featureclass2 = parcels_fc, subtype2= ''
)

## --> Buildings polygons should not overlap  -> Must Not Overlap (Area)
arcpy.management.AddRuleToTopology(
    in_topology= topology_path, rule_type= 'Must Not Overlap (Area)',
    in_featureclass= buildings_fc, subtype= ''
) 

## -->  Parcels polygons should not overlap  -> Must Not Overlap (Area)
arcpy.management.AddRuleToTopology(
    in_topology= topology_path, rule_type= 'Must Not Overlap (Area)',
    in_featureclass= parcels_fc, subtype= ''
)

In [None]:
# Validate the topology over the full extent 
arcpy.management.ValidateTopology( in_topology= topology_path, visible_extent= 'FULL_EXTENT' )  

## Export current errors to FCs for review
topo_errors_fc = os.path.join( gdb_path, 'topology_errors' )
arcpy.management.ExportTopologyErrors(
    in_topology= topology_path, out_path= gdb_path, out_base_name= 'topoErr_'
)  