# Connecting to oxeo-graph Neptune DB
## Create EC2 instance
Create an EC2 instance and add the security group `db-sg1` to it. Otherwise, it won't work.

## Install Java and Gremlin
Follow this aws guide to install Java and Gremlin https://docs.amazonaws.cn/en_us/neptune/latest/userguide/get-started-graph-gremlin.html.

If you don't find java `cacerts` in the dir it says there, you can find it in `/etc/ssl/certs/java/cacerts`.
## Install Graph Visualization
Follow instructions in https://github.com/aws/graph-notebook



In [None]:
%load_ext autoreload

%autoreload 2
from oxeo.core.models import graph 
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
import nest_asyncio
nest_asyncio.apply()


In [None]:
%reload_ext graph_notebook.magics

In [None]:
%%graph_notebook_config
{
  "host": "oxeo-graph.cluster-cstqmhnp1nqd.eu-central-1.neptune.amazonaws.com",
  "port": 8182,
  "auth_mode": "DEFAULT",
  "load_from_s3_arn": "",
  "ssl": true,
  "aws_region": "eu-central-1"
}

In [None]:
# Reset hraph if needed ()
res = %db_reset --generate-token
token = res["payload"]["token"]
!curl -X POST -H 'Content-Type: application/x-www-form-urlencoded' https://oxeo-graph.cluster-cstqmhnp1nqd.eu-central-1.neptune.amazonaws.com:8182/system -d 'action=performDatabaseReset&token='{token}

# Predict AOI and append to graph using STAC catalog

If a PROJ error appears unset env var PROJ_LIB (`unset PROJ_LIB`) and re-run notebook from terminal

In [None]:
import pystac_client
from oxeo.water.models.segmentation import Segmentation2DPredictor
from oxeo.core.models.tile import load_tile_from_stac_as_dict 
from oxeo.core.models.tile import load_aoi_from_stac_as_dict, tile_from_id, TilePath, tile_to_geom
from oxeo.core.models import data
from oxeo.core.utils import get_bounding_box

import matplotlib.pyplot as plt

URL = "https://earth-search.aws.element84.com/v0"
catalog = pystac_client.Client.open(URL)

In [None]:
predictor = Segmentation2DPredictor(ckpt_path="../data/deeplab_epoch_004.ckpt",model_name="deeplab", 
                                    bands=["B08", "B04", "B03", "B02", "B11", "B12"],chip_size=256)

## Define waterbodies AOIs

In [None]:
# wb1
geom_1 = {
        "type": "Polygon",
        "coordinates": [
          [
            [
              -58.19595336914062,
              -36.44586592744573
            ],
            [
              -58.1425666809082,
              -36.44586592744573
            ],
            [
              -58.1425666809082,
              -36.409402240706655
            ],
            [
              -58.19595336914062,
              -36.409402240706655
            ],
            [
              -58.19595336914062,
              -36.44586592744573
            ]
          ]
        ]
      }
# wb2     
geom_2 = {
        "type": "Polygon",
        "coordinates": [
          [
            [
              -418.98722648620605,
              -36.1316359508978
            ],
            [
              -418.9591598510742,
              -36.1316359508978
            ],
            [
              -418.9591598510742,
              -36.120959576074895
            ],
            [
              -418.98722648620605,
              -36.120959576074895
            ],
            [
              -418.98722648620605,
              -36.1316359508978
            ]
          ]
        ]
      }     
aoi_bbox_1 = get_bounding_box(geom_1)
aoi_bbox_2 = get_bounding_box(geom_2)


## Load slices from waterbody for visualization

In [None]:
from skimage import exposure, img_as_float
import numpy as np

search_params_1 = {
                 "bbox":aoi_bbox_1, #min_x, min_y, max_x, max_y
                 "collections":["sentinel-s2-l2a-cogs"],
                 "datetime":"2020-04-01/2020-06-01"
                 }
                 
search_params_2 = {
                 "bbox":aoi_bbox_2, #min_x, min_y, max_x, max_y
                 "collections":["sentinel-s2-l2a-cogs"],
                 "datetime":"2020-04-01/2020-06-01"
                 }


# Get waterbody 1


sample_1 = load_aoi_from_stac_as_dict(URL, search_params_1,
                          bands=["B08", "B04", "B03", "B02", "B11", "B12"],
                          revisit = slice(None),
                          chunk_aligned=False,median=False)

# Get waterbody 2

sample_2 = load_aoi_from_stac_as_dict(URL, search_params_2,
                          bands=["B08", "B04", "B03", "B02", "B11", "B12"],
                          revisit = slice(None),
                          chunk_aligned=False,median=False)




In [None]:
i = 6
# Sample 1
img = img_as_float(sample_1["image"][i][[1,2,3]].transpose(1,2,0))
vmin, vmax = np.nanmin(img),np.nanmax(img)
img = exposure.rescale_intensity(img,in_range=(vmin,vmax))
plt.imshow(img)
plt.show()

# Sample 2
img = img_as_float(sample_2["image"][i][[1,2,3]].transpose(1,2,0))
vmin, vmax = np.nanmin(img),np.nanmax(img)
img = exposure.rescale_intensity(img,in_range=(vmin,vmax))
plt.imshow(img)

## Water and Cloud predictions

In [None]:

pred_1 = predictor.predict_stac_aoi(catalog_url=URL, 
                                  collections=["sentinel-s2-l2a-cogs"], 
                                  datetime="2020-04-01/2020-06-01",
                                  constellation="sentinel-2", 
                                  bbox=aoi_bbox_1,
                                  revisit=slice(None),
                                  chunk_aligned=False)
                                  

pred_2 = predictor.predict_stac_aoi(catalog_url=URL, 
                                  collections=["sentinel-s2-l2a-cogs"], 
                                  datetime="2020-04-01/2020-06-01",
                                  constellation="sentinel-2", 
                                  bbox=aoi_bbox_2,
                                  revisit=slice(None),
                                  chunk_aligned=False)

In [None]:
# Sample 1 prediction
plt.imshow(pred_1[i])
plt.show()

# Sample 2 prediction
plt.imshow(pred_2[i])
plt.show()

## Add waterbody and asset nodes to knoledge graph

In [None]:
# Waterbody 1
aoi_1 = data.get_aoi_from_stac_catalog(catalog_url=URL, 
                                     search_params=search_params_1,
                                     chunk_aligned=True)
coords = aoi_1.isel(time=0).sel(band=["B08", "B04", "B03", "B02", "B11", "B12"]).coords
coords_dict_1 = graph.stackstac_xa_coords_to_dict(coords)

# Waterbody 2
aoi_2 = data.get_aoi_from_stac_catalog(catalog_url=URL, 
                                     search_params=search_params_2,
                                     chunk_aligned=True)
coords = aoi_2.isel(time=0).sel(band=["B08", "B04", "B03", "B02", "B11", "B12"]).coords
coords_dict_2 = graph.stackstac_xa_coords_to_dict(coords)



In [None]:
# Connecto to graph db and add nodes
g = traversal().withRemote(DriverRemoteConnection('wss://oxeo-graph.cluster-cstqmhnp1nqd.eu-central-1.neptune.amazonaws.com:8182/gremlin','g'))

# Add asset node
graph.add_node_to_graph(g, "asset", {"id": "test_asset", "name": "Great Asset"})

In [None]:
# Add waterbody nodes and create edges from asset 

# wb1
graph.add_node_to_graph(g, "waterbody", coords_dict_1)
g.V("test_asset").addE("has").to(g.V(coords_dict_1["id"]).next()).next()

# wb2
graph.add_node_to_graph(g, "waterbody", coords_dict_2)
g.V("test_asset").addE("has").to(g.V(coords_dict_2["id"]).next()).next()

In [None]:
# Add revisit nodes to waterbodies
for i, ts in enumerate(aoi_1.time.values):
    parent_id = coords_dict_1["id"]
    graph.add_revisit(g, 
        parent_node=parent_id, 
        v_properties={"id":f"{parent_id}_revisit_{i}", "res_level": int(np.sum(pred_1[i]==1))},
        timestamp=str(ts))
        
for i, ts in enumerate(aoi_2.time.values):
    parent_id = coords_dict_2["id"]
    graph.add_revisit(g, 
        parent_node=parent_id, 
        v_properties={"id":f"{parent_id}_revisit_{i}", "res_level": int(np.sum(pred_2[i]==1))},
        timestamp=str(ts))

### Visualize graph

In [None]:
%%gremlin -p v,outE,inV
g.V().outE().inV().path()

## Queries

In [None]:
from gremlin_python.process.traversal import Operator
from gremlin_python.process.traversal import P
from gremlin_python.process.traversal import Order


# Get the mean res_level for the asset:
print(g.V("test_asset").out("has").out("has").values("res_level").mean().next())

# Get the mean res_level without 0:
print(g.V("test_asset").out("has"). \
                        out("has"). \
                        has("res_level", P.gt(0)). \
                        values("res_level").mean().next())

In [None]:
# Plot res_level timeseries for a given waterbody
ts = g.V(coords_dict_1["id"]).outE("has").order().by('timestamp',Order.asc).values("timestamp").to_list()
ts = [t[:10] for t in ts]
ts_res_level = g.V(coords_dict_1["id"]). \
                        outE("has").order(). \
                        by('timestamp',Order.asc). \
                        inV(). \
                        values("res_level"). \
                        to_list()
                        
plt.figure(figsize=(15,5))
plt.plot(ts_res_level, label='Res Level', 
         linewidth=1)
# Auto space
plt.tick_params(axis='x',labelsize=15,rotation=45)

plt.tight_layout()