In [None]:
import json
import re

import ckanapi
import time
import pandas as pd
import geopandas

import panel as pn
import param
pn.extension()

import geojson
from shapely.geometry import shape, Point

import folium
import matplotlib.pyplot as plt

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

#from .ioos_ckanapi.util import test_func
from ioos_ckan_catalog.ioos_ckanapi.util import query_ckan, create_geodataframe, plot
#from ioos_ckan_catalog.ioos_ckanapi.util import create_geodataframe, plot

#from ioos_ckan_catalog import ioos_ckanapi

### Set up a ckanapi client to query IOOS Catalog

In [None]:
from ckanapi import RemoteCKAN
ua = 'ckanapiioos/1.0 (+https://ioos.us/)'
ioos_catalog = RemoteCKAN('https://data.ioos.us', user_agent=ua)
ioos_catalog

### Query examples using ckanapi:

More information about ckanapi is available at: https://github.com/ckan/ckanapi.

**Example command line execution of ckanapi:**
```
ckanapi action package_search -r https://data.ioos.us q=res_format:ERDDAP-TableDAP fq=cf_standard_names:mass_concentration_of_oxygen_in_sea_water rows=1
```

**Example queries using the Python package**

First, filter only for `cf_standard_names:mass_concentration_of_oxygen_in_sea_water` (should produce ~ 574 results)::
```
datasets = ioos_catalog.action.package_search(fq='+cf_standard_names:mass_concentration_of_oxygen_in_sea_water', rows=50)
datasets['count']
```

Add a filter for `res_format:ERDDAP-TableDAP` (~ 507 results)::
```
datasets = ioos_catalog.action.package_search(q='+res_format:ERDDAP-TableDAP', fq='+cf_standard_names:mass_concentration_of_oxygen_in_sea_water', rows=50)
datasets['count']
```

Adding an organization filter for `+organization:gcoos` (~ 143 results):
```
datasets = ioos_catalog.action.package_search(q='+res_format:ERDDAP-TableDAP +organization:gcoos', fq='+cf_standard_names:mass_concentration_of_oxygen_in_sea_water', rows=50)
datasets['count']
```

Next, we'll create some controls to automate the parameter selection and generate these queries dynamically.

### Query IOOS Catalog to Populate Parameter Pulldowns

We can query CKAN API endpoints to populate pulldown menus for filter criteria for dataset queries.  Endpoints available to populate information from CKAN database include:
- [tag_list](https://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.get.tag_list)
- [organization_list](https://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.get.organization_list)

Use ckanapi to query these IOOS Catalog endpoints for values. Some filtering is necessary for things like tag_list to restrict to CF Standard Names (approximately).

In [None]:
tags = ioos_catalog.action.tag_list()
organizations = ioos_catalog.action.organization_list()
organizations.insert(0,None)

for i, org in enumerate(organizations):
    print(f"i: {i}, org: {org}")
    if i >= 10: break

Next, filter this list using regex to only include tags that match the CF Standard Name patterns (underscore ('_') between text phrases).

We need to use one of the acceptable approaches to removing elements from a list in Python like `filter` or list comprehension (ie not iterating usina a for in loop becuase the indexes get messed up ):

In [None]:
print(f"tags length: {len(tags)}")

# any will work:
#p = re.compile("\S*_\S*")
#p = re.compile(".*_.*")
p = re.compile("_+?")
#p = re.compile("_+")
#p = re.compile("_{1}")

# doesn't work (matches anything):
#p = re.compile("_*")

tags_filtered = list(filter(lambda tag: p.search(tag), tags))
#tags_filtered = [tag for tag in tags if p.search(tag)]

# filter whitespace and bracket characters:
whitespace = re.compile("\s+?")
bracket = re.compile("[<>]")
tags_filtered = list(filter(lambda tag: not whitespace.search(tag) and not bracket.search(tag), tags_filtered))
tags_filtered.insert(0,None)

# print out the first 10 tags in tags_filtered:
print(f"tags_filtered length: {len(tags_filtered)}")
for i, tag in enumerate(tags_filtered):
    print(f"i: {i}, tag: {tag}")
    if i >= 10: break

For CKAN Resource Formats, according to the [CKAN API docs](https://docs.ckan.org/en/latest/api/index.html) there doesn't appear to be an API function to obtain a list, so instead we just create it manually.

In [None]:
formats = ['OPeNDAP','ERDDAP-TableDAP','ERDDAP-GridDAP','ERDDAP-WMS','SOS','HTML','WCS','WMS','WFS','XML','PDF','TXT']
formats.insert(0,None)
for i, tag in enumerate(formats):
    print(f"i: {i}, tag: {tag}")
    if i >= 10: break

Create a Param selector to choose a CF Standard Name for query:

In [None]:
class CatalogParams(param.Parameterized):
    #sel_cf_std_name = param.ObjectSelector(objects=tags_filtered, default=tags_filtered[0], allow_None=True, label="Select a CF Standard Name:")
    #sel_cf_std_name = param.ObjectSelector(objects=tags_filtered, default='mass_concentration_of_oxygen_in_sea_water', allow_None=True, label="Select a CF Standard Name:")
    sel_cf_std_name = param.ObjectSelector(objects=tags_filtered, default='sea_water_turbidity', allow_None=True, label="Select a CF Standard Name:")
    sel_organization = param.ObjectSelector(objects=organizations, default=None, allow_None=True, label="Select an Organization:")
    sel_format = param.ObjectSelector(objects=formats, default=None, allow_None=True, label="Select a Format:")
    sel_plot_type = param.ObjectSelector(objects=['Static Map','Heat Map'], default='Heat Map', allow_None=False, label="Select a Plot Type:")
chart_params = CatalogParams()
chart_params


Create some Panel widgets to provide the interactive controls and outputs for the query/plot

In [None]:
generate_plot_button = pn.widgets.Button(name='Generate Plot', button_type='primary')
status = pn.widgets.TextInput(value='')

def generate_plot(event):
    status.value = "Querying IOOS Catalog for datasets..."
    list_of_datasets = query_ckan(ioos_catalog, sel_cf_std_name=chart_params.sel_cf_std_name, sel_organization=chart_params.sel_organization, sel_format=chart_params.sel_format)
    gdf = create_geodataframe(list_of_datasets)
    ioos_plot = plot(gdf, sel_cf_std_name=chart_params.sel_cf_std_name, sel_organization=chart_params.sel_organization, sel_format=chart_params.sel_format, sel_plot_type=chart_params.sel_plot_type)
    #gspec[1:,1:4] = None
    gspec[1:,1:4] = pn.panel(ioos_plot)
    status.value = "Completed query."
    
generate_plot_button.on_click(generate_plot)

Define a Panel layout to hold our parameter selection pulldowns:

In [None]:
#gspec = pn.GridSpec(sizing_mode='stretch_height', height_policy='fit')

# define a Panel GridSpec object with 4 columns and 10 rows:
gspec = pn.GridSpec(mode='override', sizing_mode='stretch_both', max_height=800)
gspec[0, 0:] = chart_params
#gspec[1,0] = pn.Row(generate_plot_button, status)
gspec[1,0] = generate_plot_button
gspec[2:10,0] = status
gspec[1:,1:4] = pn.widgets.TextInput(value='Placeholder')
gspec

In [None]:
ioos_plot

#### Submit a query to the CKAN API with parameters of interest

See the util.query_ckan() function for details.

In [None]:
'''
result_rows = 50
result_count = 0
max_results = 5000
list_of_datasets = []
datasets_dict = {}

q = ""
q = q + f" +organization:{chart_params.sel_organization}" if chart_params.sel_organization is not None else q
q = q + f" +res_format:{chart_params.sel_format}" if chart_params.sel_format is not None else q
print(f"q: {q}")
    
fq = ""
fq = fq + f" +cf_standard_names:{chart_params.sel_cf_std_name}" if chart_params.sel_cf_std_name is not None else fq
print(f"fq: {fq}")

while True:
    #datasets = ioos_catalog.action.package_search(q='+res_format:ERDDAP-TableDAP +organization:cencoos', fq='+cf_standard_names:mass_concentration_of_oxygen_in_sea_water', rows=10, start=result_count)
    #datasets = ioos_catalog.action.package_search(q='+res_format:ERDDAP-TableDAP +res_format:OPeNDAP', fq='+cf_standard_names:mass_concentration_of_oxygen_in_sea_water', rows=50, start=result_count)
    datasets = ioos_catalog.action.package_search(q=q, fq=fq, rows=result_rows, start=result_count)
    num_results = datasets['count']
    print(f"num_results: {num_results}, result_count: {result_count}")
    
    # let's add a check to avoid processing too many results:
    if num_results > max_results: 
        print(f"Your query returned > {max_results} results.  Try adjusting the parameters to be more selective - the IOOS Catalog server will thank you.")
        break
        
    # if we return a large number of results (max_results/2), adjust the result_rows value higher to reduce roundtrips:
    if num_results > max_results / 2:
        result_rows = max_results / 10
    elif num_results > max_results / 5:
        result_rows = max_results / 25
    
    for dataset in datasets['results']:
        datasets_dict = parse_result(dataset)
        list_of_datasets.append(datasets_dict)
        result_count = result_count + 1
    time.sleep(1)
    if(result_count >= num_results):
        print(f"num_results: {num_results}, result_count: {result_count}")
        break

# output datasets_dict.keys() and an example dataset:
print(f"datasets_dict_keys: {datasets_dict.keys()}")
if len(list_of_datasets) > 0: [print(key,':',value) for key, value in list_of_datasets[0].items()]
#if len(list_of_datasets) > 0: print(json.dumps(list_of_datasets[0], indent=4))
'''
list_of_datasets = query_ckan(ioos_catalog, sel_cf_std_name=chart_params.sel_cf_std_name, sel_organization=chart_params.sel_organization, sel_format=chart_params.sel_format)

#### Convert the GeoJSON 'spatial' column to Shapely geometry object to use with GeoPandas

Resulting fields/columns:
- 'spatial': Shapely Polygon
- 'spatial_point': Shapely Polygon and Point (depending on the size of the original polygon bounding box)
- 'spatial_geojson': GeoJSON text field, the original 'spatial' field value from CKAN API

WKT is supposed to work according to the [docs](https://geopandas.org/gallery/create_geopandas_from_pandas.html), but seems to fail.

In [None]:
#df = pd.DataFrame(list_of_datasets).dropna(subset=[df.spatial])
df = pd.DataFrame(list_of_datasets)
df.dropna(subset=['spatial'], inplace=True)


# create a new column to store the original 'spatial' column (which is GeoJSON format):
df['spatial_geojson'] = df.spatial

# convert the 'spatial' column from GeoJSON to Shapely geometry for GeoPandas compatibility:
df.spatial = df.spatial.apply(lambda x: shape(geojson.loads(x)))



Create a new 'spatial_point' column of Shapely geometry objects that converts any geometries where the difference between lat/lon min and max is < .0001 degree to Point, and retains all the others as Polygon:

In [None]:
# the approach below uses abs() account for postive/negative lat/lon coordinates to perform the calculation correctly for all quadrants on the globe (or for features crossing meridian/equator)
df['spatial_point'] = df.apply(lambda row: row.spatial if abs(abs(float(row['bbox-west-long'])) - abs(float(row['bbox-east-long']))) > 0.0001 and abs(abs(float(row['bbox-north-lat'])) - abs(float(row['bbox-south-lat']))) > 0.0001 else Point(float(row['bbox-east-long']), float(row['bbox-south-lat'])), axis=1)

# same as above but without using abs():
#df['spatial_point'] = df.apply(lambda row: row.spatial if float(row['bbox-east-long']) - float(row['bbox-west-long']) > 0.0001 and float(row['bbox-north-lat']) - float(row['bbox-south-lat']) > 0.0001 else Point(float(row['bbox-east-long']), float(row['bbox-south-lat'])), axis=1)

# this just converts every tow to a Point object:
#df['#spatial_point'] = df.apply(lambda row: Point(float(row['bbox-east-long']), float(row['bbox-south-lat'])), axis=1)

df

**Debug/Testing:** 

The code in the first for loop just outputs the df.spatial_geojson in WKT and the second for loop lists the converted WKT geometries in df.spatial_point for all rows in the DataFrame.

Commented out as it's not necessary to run in order for the notebook to execute.

In [None]:
#for shape_geojson in (df.spatial_geojson):
#    geom = shape(geojson.loads(shape_geojson))
#    print(f"wkt: {geom.wkt}")
#    print(f"wkb: {geom.wkb}")
 
#for spatial_val in df.spatial: print(spatial_val)
#for spatial_val in df.spatial_geojson: print(spatial_val)

print(type(df.spatial_point[0]))
print(type(df.spatial[0]))

### GeoPandas GeoDataFrame

Create a GeoPandas GeoDataFrame from the regular Pandas DataFrame.  Assign geometry column.

In [None]:
gdf = geopandas.GeoDataFrame(df)
#gdf.set_geometry("spatial", inplace=True, crs="EPSG:4326")
gdf.set_geometry("spatial_point", inplace=True, crs="EPSG:4326")

# print the name of the GeoPandas geometry column name:
gdf.geometry.name
#gdf


### Plotting

Examples taken from: https://geopandas.readthedocs.io/en/latest/gallery/plotting_with_folium.html

#### Make a simple plot color-coded by the `creator_sector` field

In [None]:
gdf.plot(column='creator_sector', figsize=(18, 18), legend=True)

#### Create a more complicated plot with world continents background.

**To Do:** 
- retrieve a bounding box from the GeoDataFrame and set plot extent to match (defaults to extent of largest dataset, in this case the natural earth background)

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

fig, ax = plt.subplots(figsize=(24,18))
world.plot(ax=ax, alpha=0.4, color='grey')
#gdf.plot(column='spatial', ax=ax, legend=True)
gdf.plot(ax=ax, column='creator_sector', legend=True)
plt.title(f"IOOS Catalog Dataset Coverage.  Filters - CF Std Name: {chart_params.sel_cf_std_name}, Org: {chart_params.sel_organization}, Format: {chart_params.sel_format}")

In [None]:
from folium import plugins

map = folium.Map(location = [15,30], tiles='Cartodb dark_matter', zoom_start = 2)

heat_data = [[geom.xy[1][0], geom.xy[0][0]] if geom.geom_type == "Point" else [geom.centroid.xy[1][0], geom.centroid.xy[0][0]] for geom in gdf.geometry ]

heat_data
plugins.HeatMap(heat_data).add_to(map)

map

In [None]:
map = folium.Map(location = [13.406,80.110], tiles='OpenStreetMap' , zoom_start = 9)
map

### Demo CKAN Instance Examples:

In [None]:
#ioos_catalog = RemoteCKAN('https://data.ioos.us', user_agent=ua, get_only=True)
demo = RemoteCKAN('https://demo.ckan.org', user_agent=ua)
demo


In [None]:
demo_datasets = demo.action.group_list(id='Test%20Group')
print(groups)

In [None]:
demo_datasets = demo.action.package_search(q='+organization:sample-organization +res_format:GeoJSON +tags:geojson')
demo_datasets['count']
#print(demo_datasets)