In [1]:
import os
import geopandas as gpd
import pandas as pd

In [2]:
import sys
sys.path.append('../')
from process.append import get_enriched_features

In [3]:
enriched_path = r"D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0"

In [4]:
enriched_list = os.listdir(enriched_path)

In [5]:
# skip the append, reports gdb file path
skip_list = ['appended.gdb', 'reports.gdb']

In [6]:
point_layers = []
line_layers = []
poly_layers = []
for f in enriched_list:
    if f in skip_list:
        continue
    f_path = enriched_path + r"\{}".format(f)
    gdb_layers = gpd.list_layers(f_path)
    for i in range(len(gdb_layers)):
        if 'point' in gdb_layers.loc[i, 'geometry_type'].lower():
            point_layers.append({'gdb_path': f_path, 'layer_name':gdb_layers.loc[i, 'name']})
        elif 'line' in gdb_layers.loc[i, 'geometry_type'].lower():
            line_layers.append({'gdb_path': f_path, 'layer_name':gdb_layers.loc[i, 'name']})
        else:
            poly_layers.append({'gdb_path': f_path, 'layer_name':gdb_layers.loc[i, 'name']})
            
enriched_layers = {'point': point_layers, 
                  'line': line_layers,
                  'polygon': poly_layers}

In [7]:
enriched_layers

{'point': [{'gdb_path': 'D:\\WORK\\wildfire\\Interagency-Tracking-System\\its\\ITSGDB_backup\\V2.0\\CNRA_1950_2025.gdb',
   'layer_name': 'CNRA_enriched_20250512_point'},
  {'gdb_path': 'D:\\WORK\\wildfire\\Interagency-Tracking-System\\its\\ITSGDB_backup\\V2.0\\NFPORS_1950_2025.gdb',
   'layer_name': 'NFPORS_enriched_20250509_point'},
  {'gdb_path': 'D:\\WORK\\wildfire\\Interagency-Tracking-System\\its\\ITSGDB_backup\\V2.0\\PFIRS_1950_2025.gdb',
   'layer_name': 'PFIRS_20250523'},
  {'gdb_path': 'D:\\WORK\\wildfire\\Interagency-Tracking-System\\its\\ITSGDB_backup\\V2.0\\Timber_Nonspatial_1950_2025.gdb',
   'layer_name': 'Timber_Nonspatial_20250523'}],
 'line': [{'gdb_path': 'D:\\WORK\\wildfire\\Interagency-Tracking-System\\its\\ITSGDB_backup\\V2.0\\CalTRANS_1950_2025.gdb',
   'layer_name': 'CalTRANS_enriched_20250512'},
  {'gdb_path': 'D:\\WORK\\wildfire\\Interagency-Tracking-System\\its\\ITSGDB_backup\\V2.0\\CNRA_1950_2025.gdb',
   'layer_name': 'CNRA_enriched_20250512_line'}],
 'poly

In [8]:
enriched_polygons, enriched_lines, enriched_points = get_enriched_features(enriched_layers)

2025-05-23 12:36:37,552 INFO  [process.append_polygon]  --------------------------------------------------------------------------------
2025-05-23 12:36:37,552 INFO  [process.append_polygon]  Concatenate all polygon records
2025-05-23 12:36:37,552 INFO  [process.append_polygon]  Load GeoDataFrame from the layer 'BLM_enriched_20250509' in 'D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\BLM_1950_2025.gdb' 
  return ogr_read(
  return ogr_read(
2025-05-23 12:36:37,647 INFO  [process.append_polygon]  Load GeoDataFrame from the layer 'CNRA_enriched_20250512_polygon' in 'D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\CNRA_1950_2025.gdb' 
  return ogr_read(
2025-05-23 12:36:40,959 INFO  [process.append_polygon]  Load GeoDataFrame from the layer 'NFPORS_enriched_20250509_polygon' in 'D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\NFPORS_1950_2025.gdb' 
  return ogr_read(
2025-05-23 12:36:41,090 INFO  [process.append_polygon]  Load

In [9]:
california_boundary = gpd.read_file(r'D:\WORK\wildfire\Interagency-Tracking-System\its\Interagency Tracking System.gdb', 
                                    driver='OpenFileGDB', 
                                    layer='California')

  return ogr_read(


In [10]:
import dask_geopandas
# some data need to be converted to multi-type again
from utils.save_gdf_to_gdb import save_gdf_to_gdb

In [11]:
append_path = r"D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\appended.gdb"

In [12]:
# grab timber non spatial path again
timber_nonspatial_path = None
for p in enriched_layers['point']:
    if 'Timber_Nonspatial' in p['gdb_path']:
        timber_nonspatial_path = p
        break

In [13]:
%%time



for df, lyr_name in zip([enriched_polygons,enriched_lines,enriched_points], ["appended_poly","appended_line","appended_point"]):
    # init dask gdf for multithread clipping
    ddf = dask_geopandas.from_geopandas(df, npartitions=16)
    # clip to california bounds
    append_clipped = ddf.sjoin(california_boundary, how='inner', predicate='intersects').compute()
    # industry nonspatial is by design out of california bounds and got clipped, manually concat it back
    if lyr_name == 'appended_point':
        timber_nonspatial = gpd.read_file(timber_nonspatial_path['gdb_path'], 
                                    driver='OpenFileGDB', 
                                    layer=timber_nonspatial_path['layer_name'])
        
        append_clipped = pd.concat([append_clipped, timber_nonspatial], ignore_index=True)
    save_gdf_to_gdb(append_clipped, append_path, lyr_name)

2025-05-23 12:42:33,778 INFO  [utils.save_gdf_to_gdb ]        Windows machine detected
2025-05-23 12:42:33,778 INFO  [utils.save_gdf_to_gdb ]        Check geodataframe geometry object and cast to Multi-x type if both exist
2025-05-23 12:42:33,816 INFO  [utils.save_gdf_to_gdb ]        Running GDAL OpenFileGDB to save to file


D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\appended.gdb


  ogr_write(
2025-05-23 12:43:03,093 INFO  [pyogrio._io           ]  Created 441,628 records
2025-05-23 12:43:09,985 INFO  [utils.save_gdf_to_gdb ]        File saved to D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\appended.gdb appended_poly
2025-05-23 12:44:50,147 INFO  [utils.save_gdf_to_gdb ]        Windows machine detected
2025-05-23 12:44:50,148 INFO  [utils.save_gdf_to_gdb ]        Check geodataframe geometry object and cast to Multi-x type if both exist
2025-05-23 12:44:50,157 INFO  [utils.save_gdf_to_gdb ]        Running GDAL OpenFileGDB to save to file


D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\appended.gdb


  ogr_write(
2025-05-23 12:45:10,843 INFO  [pyogrio._io           ]  Created 131,654 records
2025-05-23 12:45:14,959 INFO  [utils.save_gdf_to_gdb ]        File saved to D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\appended.gdb appended_line
  return ogr_read(
2025-05-23 12:45:16,261 INFO  [utils.save_gdf_to_gdb ]        Windows machine detected
2025-05-23 12:45:16,261 INFO  [utils.save_gdf_to_gdb ]        Check geodataframe geometry object and cast to Multi-x type if both exist
2025-05-23 12:45:16,262 INFO  [utils.save_gdf_to_gdb ]        Running GDAL OpenFileGDB to save to file


D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\appended.gdb


2025-05-23 12:45:16,785 INFO  [pyogrio._io           ]  Created 9,657 records
2025-05-23 12:45:16,858 INFO  [utils.save_gdf_to_gdb ]        File saved to D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\appended.gdb appended_point


CPU times: total: 46min 55s
Wall time: 8min 3s


In [14]:
enriched_polygons = gpd.read_file(append_path, driver='OpenFileGDB', layer='appended_poly')
enriched_points = gpd.read_file(append_path, driver='OpenFileGDB', layer='appended_point')
enriched_lines = gpd.read_file(append_path, driver='OpenFileGDB', layer='appended_line')

  return ogr_read(
  return ogr_read(
  return ogr_read(


In [15]:
enriched_points[enriched_points.AGENCY == 'TIMBER']

Unnamed: 0,PROJECTID_USER,AGENCY,ORG_ADMIN_p,PROJECT_CONTACT,PROJECT_EMAIL,ADMINISTERING_ORG,PROJECT_NAME,PROJECT_STATUS,PROJECT_START,PROJECT_END,...,COUNTS_TO_MAS,index_right,FIPS,STATE,ST_ABBREV,SQMI,POPULATION,Shape_Length,Shape_Area,geometry
9630,TI-0,TIMBER,,,,,TI-0,,NaT,NaT,...,YES,,,,,,,,,POINT (-294794.243 -53503.055)
9631,TI-1,TIMBER,,,,,TI-1,,NaT,NaT,...,YES,,,,,,,,,POINT (-284207.67 -54506.164)
9632,TI-2,TIMBER,,,,,TI-2,,NaT,NaT,...,YES,,,,,,,,,POINT (-286320.171 -38332.883)
9633,TI-3,TIMBER,,,,,TI-3,,NaT,NaT,...,YES,,,,,,,,,POINT (-299416.973 -55835.01)
9634,TI-4,TIMBER,,,,,TI-4,,NaT,NaT,...,YES,,,,,,,,,POINT (-336771.646 -37914.011)
9635,TI-5,TIMBER,,,,,TI-5,,NaT,NaT,...,YES,,,,,,,,,POINT (-303664.539 -55835.02)
9636,TI-6,TIMBER,,,,,TI-6,,NaT,NaT,...,YES,,,,,,,,,POINT (-305851.779 -34474.566)
9637,TI-7,TIMBER,,,,,TI-7,,NaT,NaT,...,YES,,,,,,,,,POINT (-306999.994 -39038.796)
9638,TI-8,TIMBER,,,,,TI-8,,NaT,NaT,...,YES,,,,,,,,,POINT (-299529.723 -40287.156)
9639,TI-9,TIMBER,,,,,TI-9,,NaT,NaT,...,YES,,,,,,,,,POINT (-313799.068 -49649.918)


In [16]:
from datetime import datetime

In [23]:
def get_activity_report(enriched_points, enriched_lines, enriched_polygons):
    append_all = pd.concat([enriched_lines, enriched_points, enriched_polygons])
    append_all = append_all[(append_all['COUNTS_TO_MAS'] == 'YES')]
    
    append_all.geometry = gpd.points_from_xy(append_all['LONGITUDE'],append_all['LATITUDE'])
    
    
    append_all = append_all[["AGENCY",
        "ADMINISTERING_ORG",
        "PRIMARY_OWNERSHIP_GROUP",
        "COUNTY",
        "REGION",
        "ACTIVITY_DESCRIPTION",
        "ACTIVITY_CAT",
        "BROAD_VEGETATION_TYPE",
        "ACTIVITY_STATUS",
        "ACTIVITY_QUANTITY",
        "ACTIVITY_UOM",
        "ACTIVITY_END",
        "Year_txt",
        "geometry"
    ]]
    
    # check if geometry is_valid
    # personally this is redundent, unless lat, lon is not valid, but that would throw an error in previous part
    append_all = append_all[append_all.is_valid]
    
    
    def get_entity_type(agency):
        if agency in ['CALEPA', 'CALSTA', 'CNRA', 'PARKS', 'California State Parks']:
            return 'State'
        if agency in ['DOD', 'DOI', 'USDA', 'DOE', 'NPS']:
            return 'Federal'
        if agency in ['Industrial Timber', 'Timber Companies', 'TIMBER']:
            return 'Timber Companies'
        else:
            return None
        
        
    append_all['ENTITY_TYPE'] = append_all['AGENCY'].apply(get_entity_type)
    
    return append_all

In [24]:
activity_report_gdf = get_activity_report(enriched_points, enriched_lines, enriched_polygons)

In [25]:
save_gdf_to_gdb(activity_report_gdf, 
                r'D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\reports.gdb', 
                f'activity_report{datetime.today().strftime('%Y%m%d')}')

2025-05-23 12:47:26,893 INFO  [utils.save_gdf_to_gdb ]        Windows machine detected
2025-05-23 12:47:26,894 INFO  [utils.save_gdf_to_gdb ]        Check geodataframe geometry object and cast to Multi-x type if both exist
2025-05-23 12:47:26,918 INFO  [utils.save_gdf_to_gdb ]        Running GDAL OpenFileGDB to save to file


D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\reports.gdb


2025-05-23 12:47:32,260 INFO  [pyogrio._io           ]  Created 459,008 records
2025-05-23 12:47:34,940 INFO  [utils.save_gdf_to_gdb ]        File saved to D:\WORK\wildfire\Interagency-Tracking-System\its\ITSGDB_backup\V2.0\reports.gdb activity_report20250523


np.float64(621382.9378068636)