In [8]:
from pathlib import Path
import re
import yaml
from urllib.parse import urlparse, parse_qs, urlencode

import requests

In [51]:
path = Path("../sentinel-2/ccc/README.md")

In [118]:
from dataclasses import dataclass, asdict
from typing import Optional

@dataclass
class ExampleDefinition:
    zoom: int
    lat: float
    lng: float
    datasetId: str # one of ["S2L2A", "S2L1C"]
    fromTime: str
    toTime: str
    platform: list
    evalscripturl: Optional[str] = None
    evalscript: Optional[str] = None
    name: Optional[str] = None
    additionalQueryParams: Optional[list] = None

    def dict(self):
        return {k: v for k, v in asdict(self).items() if v}

In [169]:
cdse_lookup = {
    "S2_L2A_CDAS": "S2L2A",
    "S2_L1C_CDAS": "S2L1C",
    "S1_CDAS_IW_VVVH": "S1_AWS_IW_VVVH"
}

playground_lookup = {
    "S2": "S2L2A",
    "S1_AWS_IW_VVVH": "S1_AWS_IW_VVVH"
}

In [157]:
def get_urls_to_convert(urls):
    # 1. Ignore playground if there's other links
    # Check how many links contain sentinel-playground
    non_playground = [url for url in urls if "sentinel-playground" not in url]
    if len(non_playground):
        urls = non_playground
    return urls
        
def resolve_redirect(url):
    r = requests.get(url) 
    return r.url

def parse_url(url):
    # Find which service it is
    if "sentinelshare.page.link" in url:
        url = resolve_redirect()
        return parse_sh(url)
    if "link.dataspace.copernicus.eu" in url:
        url = resolve_redirect()
        return parse_cdse(url)
    if "sentinel-playground" in url:
        return parse_playground(url)
    if "apps.sentinel-hub.com/eo-browser" in url:
        return parse_sh(url)
    if "dataspace.copernicus.eu/browser" in url:
        return parse_cdse(url)

def parse_sh(url):
    parts = urlparse(url)
    query = parse_qs(parts.query)
    unnecessary = ["demSource3D", "themeId", "visualizationUrl"]
    [query.pop(to_del, None) for to_del in unnecessary]
    return ExampleDefinition(
        zoom = query.pop("zoom")[0],
        lat = query.pop("lat")[0],
        lng = query.pop("lng")[0],
        evalscripturl = query.pop("evalscripturl", [None])[0],
        evalscript = query.pop("evalscript", [None])[0],
        datasetId = query.pop("datasetId")[0],
        fromTime = query.pop("fromTime")[0],
        toTime = query.pop("toTime")[0],
        additionalQueryParams=[[k,v[0]] for k,v in query.items()],
        platform = ["CDSE", "EOB"]
    )

def parse_playground(url):
    parts = urlparse(url)
    query = parse_qs(parts.query)
    unnecessary = ["preset", "layers", "gain", "gamma", "atmFilter", "showDates", "temporal"]
    [query.pop(to_del, None) for to_del in unnecessary]
    dataset = playground_lookup[query.pop("source")[0]]
    fromTime, toTime = query.pop("time")[0].split("|")
    return ExampleDefinition(
        zoom = query.pop("zoom")[0],
        lat = query.pop("lat")[0],
        lng = query.pop("lng")[0],
        evalscripturl = query.pop("evalscripturl", [None])[0],
        evalscript = query.pop("evalscript", [None])[0],
        datasetId = dataset,
        fromTime = fromTime,
        toTime = toTime,
        additionalQueryParams=[[k,v[0]] for k,v in query.items()],
        platform = ["CDSE", "EOB"]
    )

def parse_cdse(url):
    parts = urlparse(url)
    query = parse_qs(parts.query)
    unnecessary = ["demSource3D", "themeId", "visualizationUrl", "dateMode"]
    [query.pop(to_del, None) for to_del in unnecessary]
    dataset = cdse_lookup[query.pop("datasetId")[0]]
    return ExampleDefinition(
        zoom = query.pop("zoom")[0],
        lat = query.pop("lat")[0],
        lng = query.pop("lng")[0],
        evalscripturl = query.pop("evalscripturl", [None])[0],
        evalscript = query.pop("evalscript", [None])[0],
        datasetId = dataset,
        fromTime = query.pop("fromTime")[0],
        toTime = query.pop("toTime")[0],
        additionalQueryParams=[[k,v[0]] for k,v in query.items()],
        platform = ["CDSE", "EOB"]
    )

def split_frontmatter_content(content):
    fm_split = content.split("---\n", 3)
    front_matter = yaml.safe_load(fm_split[1])
    content = fm_split[-1]
    return front_matter, content

In [178]:
too_many = []

with open(path, 'r') as original: 
    fm, content = split_frontmatter_content(original.read())

start_examples = content.find("## Evaluate")
examples, remaining_content = content[start_examples+2:].split("##", 1)
urls = re.findall("(?<=\()(.*?)(?=\))", examples)
to_convert = get_urls_to_convert(urls)
if len(to_convert) > 2:
    too_many.append(path)
    # break
example = parse_url(urls[0]).dict()
fm["examples"] = [example]
new_fm = yaml.dump(fm, sort_keys=False)
new_content = "---\n"+ new_fm + "---" + content[:start_examples] + "##" + remaining_content

with open(path, "w") as fs:
    fs.write(new_content)


In [174]:
# EOB provided with evalscript directly, single date
eob_eval = "https://apps.sentinel-hub.com/eo-browser/?zoom=10&lat=42.76703&lng=11.22847&themeId=DEFAULT-THEME&visualizationUrl=https://services.sentinel-hub.com/ogc/wms/bd86bcc0-f318-402b-a145-015f85b9427e&evalscript=Ly9WRVJTSU9OPTMKZnVuY3Rpb24gc2V0dXAoKSB7CiAgICByZXR1cm4gewogICAgICAgIGlucHV0OiBbIkIwMyIsICJCMDQiLCAiQjA4IiwgImRhdGFNYXNrIl0sCiAgICAgICAgb3V0cHV0OiBbCiAgICAgICAgICAgIHsgaWQ6ICJkZWZhdWx0IiwgYmFuZHM6IDQgfSwKICAgICAgICAgICAgeyBpZDogImluZGV4IiwgYmFuZHM6IDEsIHNhbXBsZVR5cGU6ICJGTE9BVDMyIiB9LAogICAgICAgICAgICB7IGlkOiAiZW9icm93c2VyU3RhdHMiLCBiYW5kczogMiwgc2FtcGxlVHlwZTogJ0ZMT0FUMzInIH0sCiAgICAgICAgICAgIHsgaWQ6ICJkYXRhTWFzayIsIGJhbmRzOiAxIH0KICAgICAgICBdCiAgICB9Owp9Cgpjb25zdCByYW1wID0gWwogICAgWy0wLjUsIDB4MGMwYzBjXSwKICAgIFstMC4yLCAweGJmYmZiZl0sCiAgICBbLTAuMSwgMHhkYmRiZGJdLAogICAgWzAsIDB4ZWFlYWVhXSwKICAgIFswLjAyNSwgMHhmZmY5Y2NdLAogICAgWzAuMDUsIDB4ZWRlOGI1XSwKICAgIFswLjA3NSwgMHhkZGQ4OWJdLAogICAgWzAuMSwgMHhjY2M2ODJdLAogICAgWzAuMTI1LCAweGJjYjc2Yl0sCiAgICBbMC4xNSwgMHhhZmMxNjBdLAogICAgWzAuMTc1LCAweGEzY2M1OV0sCiAgICBbMC4yLCAweDkxYmY1MV0sCiAgICBbMC4yNSwgMHg3ZmIyNDddLAogICAgWzAuMywgMHg3MGEzM2ZdLAogICAgWzAuMzUsIDB4NjA5NjM1XSwKICAgIFswLjQsIDB4NGY4OTJkXSwKICAgIFswLjQ1LCAweDNmN2MyM10sCiAgICBbMC41LCAweDMwNmQxY10sCiAgICBbMC41NSwgMHgyMTYwMTFdLAogICAgWzAuNiwgMHgwZjU0MGFdLAogICAgWzEsIDB4MDA0NDAwXSwKXTsKCmNvbnN0IHZpc3VhbGl6ZXIgPSBuZXcgQ29sb3JSYW1wVmlzdWFsaXplcihyYW1wKTsKCmZ1bmN0aW9uIGV2YWx1YXRlUGl4ZWwoc2FtcGxlcykgewogICAgbGV0IHZhbCA9IGluZGV4KHNhbXBsZXMuQjA4LCBzYW1wbGVzLkIwNCk7CiAgICAvLyBUaGUgbGlicmFyeSBmb3IgdGlmZnMgd29ya3Mgd2VsbCBvbmx5IGlmIHRoZXJlIGlzIG9ubHkgb25lIGNoYW5uZWwgcmV0dXJuZWQuCiAgICAvLyBTbyB3ZSBlbmNvZGUgdGhlICJubyBkYXRhIiBhcyBOYU4gaGVyZSBhbmQgaWdub3JlIE5hTnMgb24gZnJvbnRlbmQuCgogICAgLy8gVE9ETzogQ0hFQ0sgSUYgVEhJUyBpbmRleFZhbCBJUyBTVElMTCBORUNFU1NBUlkKICAgIGNvbnN0IGluZGV4VmFsID0gc2FtcGxlcy5kYXRhTWFzayA9PT0gMSA/IHZhbCA6IE5hTjsKICAgIGNvbnN0IGltZ1ZhbHMgPSB2aXN1YWxpemVyLnByb2Nlc3ModmFsKTsKCiAgICByZXR1cm4gewogICAgICAgIGRlZmF1bHQ6IGltZ1ZhbHMuY29uY2F0KHNhbXBsZXMuZGF0YU1hc2spLAogICAgICAgIGluZGV4OiBbaW5kZXhWYWxdLAogICAgICAgIGVvYnJvd3NlclN0YXRzOiBbdmFsLCBpc0Nsb3VkKHNhbXBsZXMpID8gMSA6IDBdLAogICAgICAgIGRhdGFNYXNrOiBbc2FtcGxlcy5kYXRhTWFza10KICAgIH07Cn0KCmZ1bmN0aW9uIGlzQ2xvdWQoc2FtcGxlcykgewogICAgY29uc3QgTkdEUiA9IGluZGV4KHNhbXBsZXMuQjAzLCBzYW1wbGVzLkIwNCk7CiAgICBjb25zdCBiUmF0aW8gPSAoc2FtcGxlcy5CMDMgLSAwLjE3NSkgLyAoMC4zOSAtIDAuMTc1KTsKICAgIHJldHVybiBiUmF0aW8gPiAxIHx8IChiUmF0aW8gPiAwICYmIE5HRFIgPiAwKTsKfQo%3D&datasetId=S2L2A&fromTime=2020-07-12T00:00:00.000Z&toTime=2020-07-12T23:59:59.999Z&demSource3D=%22MAPZEN%22#custom-script"
parts = urlparse(eob_eval)
query = parse_qs(parts.query)
del query["demSource3D"]
del query["themeId"]
del query["visualizationUrl"]
query

{'zoom': ['10'],
 'lat': ['42.76703'],
 'lng': ['11.22847'],
 'evalscript': ['Ly9WRVJTSU9OPTMKZnVuY3Rpb24gc2V0dXAoKSB7CiAgICByZXR1cm4gewogICAgICAgIGlucHV0OiBbIkIwMyIsICJCMDQiLCAiQjA4IiwgImRhdGFNYXNrIl0sCiAgICAgICAgb3V0cHV0OiBbCiAgICAgICAgICAgIHsgaWQ6ICJkZWZhdWx0IiwgYmFuZHM6IDQgfSwKICAgICAgICAgICAgeyBpZDogImluZGV4IiwgYmFuZHM6IDEsIHNhbXBsZVR5cGU6ICJGTE9BVDMyIiB9LAogICAgICAgICAgICB7IGlkOiAiZW9icm93c2VyU3RhdHMiLCBiYW5kczogMiwgc2FtcGxlVHlwZTogJ0ZMT0FUMzInIH0sCiAgICAgICAgICAgIHsgaWQ6ICJkYXRhTWFzayIsIGJhbmRzOiAxIH0KICAgICAgICBdCiAgICB9Owp9Cgpjb25zdCByYW1wID0gWwogICAgWy0wLjUsIDB4MGMwYzBjXSwKICAgIFstMC4yLCAweGJmYmZiZl0sCiAgICBbLTAuMSwgMHhkYmRiZGJdLAogICAgWzAsIDB4ZWFlYWVhXSwKICAgIFswLjAyNSwgMHhmZmY5Y2NdLAogICAgWzAuMDUsIDB4ZWRlOGI1XSwKICAgIFswLjA3NSwgMHhkZGQ4OWJdLAogICAgWzAuMSwgMHhjY2M2ODJdLAogICAgWzAuMTI1LCAweGJjYjc2Yl0sCiAgICBbMC4xNSwgMHhhZmMxNjBdLAogICAgWzAuMTc1LCAweGEzY2M1OV0sCiAgICBbMC4yLCAweDkxYmY1MV0sCiAgICBbMC4yNSwgMHg3ZmIyNDddLAogICAgWzAuMywgMHg3MGEzM2ZdLAogICAgWzAuMzUsIDB4NjA5NjM1XSwKIC

In [119]:
parse_sh(eob_eval).dict()

{'zoom': '10',
 'lat': '42.76703',
 'lng': '11.22847',
 'datasetId': 'S2L2A',
 'fromTime': '2020-07-12T00:00:00.000Z',
 'toTime': '2020-07-12T23:59:59.999Z',
 'platform': ['CDSE', 'EOB'],
 'evalscript': 'Ly9WRVJTSU9OPTMKZnVuY3Rpb24gc2V0dXAoKSB7CiAgICByZXR1cm4gewogICAgICAgIGlucHV0OiBbIkIwMyIsICJCMDQiLCAiQjA4IiwgImRhdGFNYXNrIl0sCiAgICAgICAgb3V0cHV0OiBbCiAgICAgICAgICAgIHsgaWQ6ICJkZWZhdWx0IiwgYmFuZHM6IDQgfSwKICAgICAgICAgICAgeyBpZDogImluZGV4IiwgYmFuZHM6IDEsIHNhbXBsZVR5cGU6ICJGTE9BVDMyIiB9LAogICAgICAgICAgICB7IGlkOiAiZW9icm93c2VyU3RhdHMiLCBiYW5kczogMiwgc2FtcGxlVHlwZTogJ0ZMT0FUMzInIH0sCiAgICAgICAgICAgIHsgaWQ6ICJkYXRhTWFzayIsIGJhbmRzOiAxIH0KICAgICAgICBdCiAgICB9Owp9Cgpjb25zdCByYW1wID0gWwogICAgWy0wLjUsIDB4MGMwYzBjXSwKICAgIFstMC4yLCAweGJmYmZiZl0sCiAgICBbLTAuMSwgMHhkYmRiZGJdLAogICAgWzAsIDB4ZWFlYWVhXSwKICAgIFswLjAyNSwgMHhmZmY5Y2NdLAogICAgWzAuMDUsIDB4ZWRlOGI1XSwKICAgIFswLjA3NSwgMHhkZGQ4OWJdLAogICAgWzAuMSwgMHhjY2M2ODJdLAogICAgWzAuMTI1LCAweGJjYjc2Yl0sCiAgICBbMC4xNSwgMHhhZmMxNjBdLAogICAgWzAuMTc1LCAweGEz

In [175]:
build_url = parts._replace(query=urlencode(query, doseq=True)).geturl()

In [176]:
build_url

'https://apps.sentinel-hub.com/eo-browser/?zoom=10&lat=42.76703&lng=11.22847&evalscript=Ly9WRVJTSU9OPTMKZnVuY3Rpb24gc2V0dXAoKSB7CiAgICByZXR1cm4gewogICAgICAgIGlucHV0OiBbIkIwMyIsICJCMDQiLCAiQjA4IiwgImRhdGFNYXNrIl0sCiAgICAgICAgb3V0cHV0OiBbCiAgICAgICAgICAgIHsgaWQ6ICJkZWZhdWx0IiwgYmFuZHM6IDQgfSwKICAgICAgICAgICAgeyBpZDogImluZGV4IiwgYmFuZHM6IDEsIHNhbXBsZVR5cGU6ICJGTE9BVDMyIiB9LAogICAgICAgICAgICB7IGlkOiAiZW9icm93c2VyU3RhdHMiLCBiYW5kczogMiwgc2FtcGxlVHlwZTogJ0ZMT0FUMzInIH0sCiAgICAgICAgICAgIHsgaWQ6ICJkYXRhTWFzayIsIGJhbmRzOiAxIH0KICAgICAgICBdCiAgICB9Owp9Cgpjb25zdCByYW1wID0gWwogICAgWy0wLjUsIDB4MGMwYzBjXSwKICAgIFstMC4yLCAweGJmYmZiZl0sCiAgICBbLTAuMSwgMHhkYmRiZGJdLAogICAgWzAsIDB4ZWFlYWVhXSwKICAgIFswLjAyNSwgMHhmZmY5Y2NdLAogICAgWzAuMDUsIDB4ZWRlOGI1XSwKICAgIFswLjA3NSwgMHhkZGQ4OWJdLAogICAgWzAuMSwgMHhjY2M2ODJdLAogICAgWzAuMTI1LCAweGJjYjc2Yl0sCiAgICBbMC4xNSwgMHhhZmMxNjBdLAogICAgWzAuMTc1LCAweGEzY2M1OV0sCiAgICBbMC4yLCAweDkxYmY1MV0sCiAgICBbMC4yNSwgMHg3ZmIyNDddLAogICAgWzAuMywgMHg3MGEzM2ZdLAogICAgWzAuMzUsIDB4NjA5