In [None]:
# Tirar o -n se for rodar no google colab ou jupyter local.
# Executar a linha abaixo se não tiver o ArcGIS API instalado
# !conda install -n arcgis -qy arcgis
!conda install -n arcgis -qy python-slugify geopandas

In [None]:
from arcgis.gis import GIS
# from arcgis.features import FeatureSet

from slugify import slugify
from pathlib import Path

In [None]:
gis = GIS("https://geoportal.sgb.gov.br/portal/")
gis

# FeatureLayerCollection

In [None]:
fc = gis.content.get('dc0ec5801eff48ec98924a609f939cbc')
fc

In [None]:
[(index, layer.properties.name) for index, layer in enumerate(fc.layers) if layer.properties.relationships]

## Começar daqui

In [None]:
# colocar o índice do fc.layers correspondente
survey_fc = fc.layers[16]
survey_fc.properties.name

### Request Sample data

In [None]:
%%time
# FeatureLayer query
# https://developers.arcgis.com/python/latest/api-reference/arcgis.features.toc.html#arcgis.features.FeatureLayer.query
# Spatially enabled dataframe (FeatureSet returned by query method, calling sdf readonly property)
# https://developers.arcgis.com/python/latest/api-reference/arcgis.features.toc.html#arcgis.features.FeatureSet.sdf
survey_df = (
        survey_fc.query(
        # "projeto_publicacao = 'Folha Petrolina'",
        return_geometry=False
    )
    .sdf
    .set_index("objectid")
)

survey_df.info()

### Identify related table IDs

In [None]:
assay_tbl = [(k, t.properties.name) for k, t in enumerate(fc.tables) if t.properties.id == survey_fc.properties.relationships[0].relatedTableId]
table_id, table_name = assay_tbl[0]
table_id, table_name

### Get Analytic data

In [None]:
%%time

# Function to harmonize exported values
def valor_to_text(df):
    return (
        df.qualificador
            .fillna("")
            .str.cat(df.valor.astype(str), sep='')  
            .str.lower()      
            .replace(r"^n+.*$", "ND", regex=True)
            .replace(r"^i+.*$", "IS", regex=True)
            .replace(r"^h+.*$", "INF", regex=True)
            .replace(r"^p+.*$", "PRD", regex=True)
    )

# Query related table
analysis_df = (
    fc.tables[table_id]
        .query(out_fields=["amostra", "analito", "unidade", "qualificador", "valor"])
        .sdf
        .assign(
            analito = lambda df: df.analito.str.cat(df.unidade, sep='_').str.lower(),
            valor = valor_to_text
        )    
        .filter(
            ["amostra", "analito", "valor"]
        )
        .set_index(
            ["amostra", "analito"]
        )
        .squeeze()
)

analysis_df

### Test to verify if text data is right

In [None]:
analysis_df[analysis_df.str.contains(r'^[A-Za-z].*')].value_counts()

### Pivot

In [None]:
%%time

analysis_df = (
    analysis_df.unstack()
        .rename_axis("objectid")
)
analysis_df

### Export to CSV

In [None]:
out_path = Path("output")
out_dataset_name = slugify(table_name, separator="_")

if not out_path.exists():
    out_path.mkdir()

# CSV
result_df = survey_df.join(analysis_df)

result_df.to_csv(
    out_path.joinpath(f"{out_dataset_name}.csv"),
    index=True
)

result_df.sample(3)

### Export to Geopackage

In [None]:
import geopandas

geo_df = geopandas.GeoDataFrame(
    result_df, 
    geometry=geopandas.points_from_xy(result_df.longitude, result_df.latitude), 
    crs="EPSG:4674"
)

# Check columns
geo_df.to_file(
    out_path.joinpath("geoquimica.gpkg"), 
    layer=out_dataset_name, 
    driver="GPKG"   # Check PyOGRIO
)