In [17]:
import pandas as pd
import fiona
import os
import sys
sys.path.append('/home/greg/GitHub/jgeo-utils/')
import jgeo_py.geopackages as gp

# Path to the jgeo root
jgeo_path = '/home/greg/data/rawdata/JornadaGeospatial/'
jgdb_path = '/home/greg/data/rawdata/JornadaGIS_copies/'

## Create a geopackage for each Jornada project

This notebook takes layers from the "All_studies_JornadaGDB_03_30_21.gdb" from the Jornada GIS group and inserts them into geopackages for each project ID. Since not all layers have the project ID number in them, there is some cleanup required after.

This could be adapted to jgeo db.

In [19]:
# Get a list of all layers in the geodatabase
all_gdb = jgdb_path + 'All_studies_JornadaGDB_03_30_21.gdb'
all_gdb_alt = jgdb_path + 'All_studies_JornadaGDB.gdb'
all_l = fiona.listlayers(all_gdb)
all_l_alt = fiona.listlayers(all_gdb_alt)


In [20]:
#all_l

In [21]:
# Create an exclude list because their layer names incorrectly match
# the study ids. This also takes out some empties like CDRRC veg
exclude = [lay for lay in all_l if any(z in lay for z in 
    ['CDRRC', 'JERS', 'JERG', 'JERV', '200', 'NPP_Arth','JornadaVegetation1998',
    '410_z_Plot', 'wwdt'])]
print(exclude)
print(len(all_l))
len([lay for lay in all_l if lay not in exclude])

['JRNPhysiography2000', 'JRNLandforms2000', 'JRNSoilParentMaterials2000', 'JERSoil1963', 'JERSoils1918', 'CDRRCVegetation1998', 'CDRRCVegetation1938', 'CDRRCGrassCondition1858', 'CDRRCShrubPresence1858', 'JERGrassCondition1858', 'JERShrubPresence1858', 'JERVegetation1915', 'JERVegetation1928', 'JERVegetation1998', 'JornadaVegetation1998', 'JornadaResearch_410_z_PlotCoordinates', 'wwdt_1938', 'NPP_Arthropods_1988_1994', 'NPP_arthropods_1996_2000']
600


581

In [None]:
# Now loop through study numbers, get matching layers (-exclude)
# and create a geopackage for each study
all_study = [lay for lay in all_l if lay not in exclude]
for i in range(1, 2):
    # Pad study number and get a list of all matching layers
    study = str(i).zfill(3)
    print("Study " + study)
    layers = [lay for lay in all_study if study in lay]
    # If the layer list is >0 make a geopackage and populate it
    if len(layers) > 0:
        gpkgname = os.path.join(jgeo_path, 'jrn_studies_gpkg', 'prj' + study +'.gpkg')
        result = gp.gdb_to_gpkg(all_gdb, layers, gpkgname)
        # Concatenate result logs into a dataframe
        if i == 1:
            prj_log = result
        else:
            prj_log = pd.concat([prj_log, result])
    else:
        print('  no layers to add')
# output the result log
prj_log.to_csv(os.path.join(jgeo_path, 'jrn_studies_gpkg', 'gdb_to gpkg_log_20230503.csv'))

In [11]:
print(prj_log.shape)
print(len(all_l))
prj_log.head()

(565, 7)
600


Unnamed: 0,origin_gdb,layer_name,geom_type,n_features,added_to_gpkg,gpkg_fname,dt_added
0,../JornadaGIS_copies/All_studies_JornadaGDB_03...,JornadaStudy_001_hydrology_runoff_drums,MultiPolygon,21,True,Jornada_prj_gpkg/prj001.gpkg,2022-12-09 17:15:43.052570
1,../JornadaGIS_copies/All_studies_JornadaGDB_03...,JornadaStudy_001_hydrology_runoff_exclosures,MultiPolygon,1,True,Jornada_prj_gpkg/prj001.gpkg,2022-12-09 17:15:43.384116
2,../JornadaGIS_copies/All_studies_JornadaGDB_03...,JornadaStudy_001_hydrology_runoff_gps_points,Point,142,True,Jornada_prj_gpkg/prj001.gpkg,2022-12-09 17:15:43.760363
3,../JornadaGIS_copies/All_studies_JornadaGDB_03...,JornadaStudy_001_hydrology_runoff_plates,MultiPolygon,21,True,Jornada_prj_gpkg/prj001.gpkg,2022-12-09 17:15:44.127417
4,../JornadaGIS_copies/All_studies_JornadaGDB_03...,JornadaStudy_001_hydrology_runoff_plots,MultiPolygon,21,True,Jornada_prj_gpkg/prj001.gpkg,2022-12-09 17:15:44.516192


In [21]:
added = prj_log.layer_name.loc[prj_log.added_to_gpkg=='True']
excluded = [x for x in all_l if x not in added.values]

In [22]:
# The layers below were excluded from the geopackages. Some should be added - they just don't have
# The correct project numbers. Examine the original featuredatasets in the gdb to find where they
# go.
excluded

['JRNPhysiography2000',
 'JRNLandforms2000',
 'JRNSoilParentMaterials2000',
 'JornadaGeomorphology',
 'JERSoil1963',
 'JERSoils1918',
 'CDRRCVegetation1998',
 'CDRRCVegetation1938',
 'CDRRCGrassCondition1858',
 'CDRRCShrubPresence1858',
 'JERGrassCondition1858',
 'JERShrubPresence1858',
 'JERVegetation1915',
 'JERVegetation1928',
 'JERVegetation1998',
 'JornadaVegetation1998',
 'JornadaStudy_011_npp_annual_production_data',
 'JornadaStudy_011_npp_harvest_data',
 'JornadaStudy_011_npp_quad_biomass_data',
 'JornadaStudy_086_smes_cryptogam_crust_quad',
 'JornadaStudy_086_smes_leaf_litter_quad_data',
 'JornadaStudy_086_smes_plant_cover_line_data',
 'JornadaStudy_086_smes_plant_cover_quad_data',
 'JornadaStudy_086_smes_rabbit_feces_quad_data',
 'JornadaStudy_086_smes_rodent_trapping_web_data',
 'JornadaStudy_086_smes_soil_disturbance_quad_data',
 'JornadaStudy_086_smes_termite_casing_quad_data',
 'JornadaStudy_380_jer_standard_raingage_data',
 'Exc',
 'Playas_30',
 'watersheds_30',
 'JER_Hi