# Jornada layer inventory

This script generates an inventory of jgeo layers matched with origin layers from
Jornada file geodatabases (.gdb files)

In [1]:
# Import sqlalchemy and pandas
import sqlalchemy as sqla
import pandas as pd
import fiona
import os

# Get credentials for the jgeo database
import sys
sys.path.append('/home/greg/admin/')
import jgeo_cred as dbcred

# get a connection to jgeo
conn = sqla.create_engine("postgresql+psycopg2://{0}:{1}@{2}:5432/{3}".format(
    dbcred.user, dbcred.pwd, dbcred.host, dbcred.db))
# Where are the Jornada geodatabase copies
jgdb_path = '/home/greg/data/rawdata/JornadaGIS_copies'


In [2]:
# Get a list of jgeo layers in the requested schemas
insp = sqla.inspect(conn)
jgeo_tables = []
for s in ['jrn_studies', 'biogeography_and_soils', 'bounds_and_structures',
    'non_research_layers_gdb', 'physical_geography', 'sensor_networks', 'unsorted']:
    table_names = insp.get_table_names(schema=s)
    df = pd.DataFrame({'jgeo_schema': s, 'jgeo_table_name': table_names})
    jgeo_tables.append(df)

jgeo_tables = pd.concat(jgeo_tables)


In [3]:
print(jgeo_tables.shape)
jgeo_tables.tail()

(664, 2)


Unnamed: 0,jgeo_schema,jgeo_table_name
8,unsorted,npp_sites_study011
9,unsorted,studiesbuf50_26nov2014
10,unsorted,research_lter_transect_stations
11,unsorted,Soilmu_a_nm690_from_jgdb
12,unsorted,Soilmu_a_nm719_from_jgdb


In [4]:
# Get a list of layers in the requested jornada geodatabases
jgdb_tables = []
for g in ['All_studies_JornadaGDB_03_30_21.gdb',
          'Infrastructure2_21Sep15.gdb',
          'CSIS_Study413_27Aug13_Copy.gdb',
          'NonResearchLyrs_17Sep14.gdb']:
    inputFile = os.path.join(jgdb_path, g)
    table_names = fiona.listlayers(inputFile)
    # Filter out data tables (non-spatial)
    table_names = [t for t in table_names if '_data' not in t]
    df = pd.DataFrame({'jornada_fgdb': g, 'jgdb_table_name': table_names})
    jgdb_tables.append(df)

jgdb_tables = pd.concat(jgdb_tables)

In [5]:
print(jgdb_tables.shape)
jgdb_tables.head()

(659, 2)


Unnamed: 0,jornada_fgdb,jgdb_table_name
0,All_studies_JornadaGDB_03_30_21.gdb,JornadaStudy_009_animal_transects
1,All_studies_JornadaGDB_03_30_21.gdb,JornadaStudy_010_termite_bait
2,All_studies_JornadaGDB_03_30_21.gdb,JornadaStudy_012_transect_hydroprobe_tubes
3,All_studies_JornadaGDB_03_30_21.gdb,JornadaStudy_013_npp_hydroprobe_tubes
4,All_studies_JornadaGDB_03_30_21.gdb,JornadaStudy_120_boundary_fence_intercept_plan...


In [6]:
# Get the probable name used in jgeo (there have been a few manual changes) and
# add as a new column
import re
rexpression = '^jornadastudy_|^jonradastudy_|^study|^jornadaresearch_|^jornadareasearch_|^jornadareseach_'
jgdb_tables['jgdb_name_in_jgeo'] = [re.sub(rexpression, 'prj', lay.lower()) for lay in jgdb_tables.jgdb_table_name]


In [7]:
# Create the inventory by merging the jgeo and jgdb tables on table name (probable for jgdb)
output_inv = pd.merge(jgeo_tables, jgdb_tables, left_on='jgeo_table_name', right_on='jgdb_name_in_jgeo', how='outer')

In [9]:
outdir = '/home/greg/data/rawdata/JornadaGeospatial/'
output_inv.to_csv(os.path.join(outdir, 'inventory_jornada_layers_raw.csv'))