In [1]:
import importlib
import os
from pathlib import Path
import sys

from arcgis.features import GeoAccessor, GeoSeriesAccessor
from arcgis.gis import GIS
from dotenv import load_dotenv, find_dotenv
import pandas as pd

# import arcpy if available
if importlib.util.find_spec("arcpy") is not None:
    import arcpy

In [3]:
# paths to common data locations - NOTE: to convert any path to a raw string, simply use str(path_instance)
project_parent = Path('./').absolute().parent

data_dir = project_parent/'data'

data_raw = data_dir/'raw'
data_ext = data_dir/'external'
data_int = data_dir/'interim'
data_out = data_dir/'processed'

gdb_raw = data_raw/'raw.gdb'
gdb_int = data_int/'interim.gdb'
gdb_out = data_out/'processed.gdb'

# import the project package from the project package path
sys.path.append(str(project_parent/'src'))
import dm

# load the "autoreload" extension so that code can change, & always reload modules so that as you change code in src, it gets loaded
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
from ba_tools import data as ba_data

In [24]:
keys = ba_data._get_child_keys(r'SOFTWARE\WOW6432Node\Esri\BusinessAnalyst\Datasets')

def _get_dataset_info(key):
    
    name = os.path.basename(key)

    name_parts = name.split('_')

    country = name_parts[0] if name_parts[1] is not None else None
    year = int(name_parts[2]) if name_parts[2] is not None else None
    
    return name, country, year

cntry_info_lst = [_get_dataset_info(k) for k in keys]

pd.DataFrame(cntry_info_lst, columns=['name', 'country', 'year'])

In [25]:
from dm._registry_nav import *

In [29]:
cntry_key_lst = get_child_keys(r'SOFTWARE\WOW6432Node\Esri\BusinessAnalyst\Datasets')

In [37]:
cntry_key = cntry_key_lst[0]
country_code = 'USA'

[k for k in cntry_key_lst if os.path.basename(k).split('_')[0] == country_code][0]

'SOFTWARE\\WOW6432Node\\Esri\\BusinessAnalyst\\Datasets\\USA_ESRI_2019'

In [38]:
dm._registry_nav.get_usa_key()

'SOFTWARE\\WOW6432Node\\Esri\\BusinessAnalyst\\Datasets\\USA_ESRI_2019'

In [40]:
dm._registry_nav.get_usa_dataset()

TypeError: get_usa_dataset() missing 1 required positional argument: 'self'

In [70]:
reg_pth = dm._registry_nav.get_country_key(country_code)
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, reg_pth)
pth = winreg.QueryValueEx(key, 'DemographyDataDir1')[0]
gdb = list(Path(pth).glob(f'*ESRI*.gdb'))[0]
arcpy.

WindowsPath('D:/arcgis/ba_data/Data/Demographic Data/USA_ESRI_2019.gdb')

In [57]:
cntry_ba_key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\WOW6432Node\Esri\BusinessAnalyst\Datasets\USA_ESRI_2019')
winreg.QueryValueEx(key, 'DataInstallDir')

('D:\\arcgis\\ba_data\\', 1)

In [72]:
import tempfile

In [74]:
tempfile.mktemp(suffix='.csv')

'C:\\Users\\joel5174\\AppData\\Local\\Temp\\2\\tmptf7e5u8s.csv'

In [75]:
@staticmethod
def _geometry_column_to_str(self, dataframe:pd.DataFrame, geometry_column:str='SHAPE'):
    """
    Helper function to follow the paradigm of DRYD (don't repeat yourself, DUMMY!) for converting a column from
        a valid geometry type to an object(str). This is required to be able to save a Spatially Enabled DataFrame
        to flat outputs such as CSV and parquet.
    Args:
        dataframe: Spatially Enabled DataFrame
        geometry_column: Optional: Column containing valid Esri Geometry objects. This only needs to be specified
            if the name of the column is not SHAPE.

    Returns: Pandas DataFrame ready for export.

    """
    dataframe[geometry_column] = dataframe[geometry_column].swifter.allow_dask_on_strings(True).apply(
        lambda geom: geom.JSON)

    return dataframe

setattr(GeoAccessor,' _geometry_column_to_str', _geometry_column_to_str)