# Semantic metalayer

This notebook demonstrates how queries that use the semantic metalayer operate.

**For more information:** 

- @GitLab: https://jeodpp.jrc.ec.europa.eu/apps/gitlab/jeodpp-services/training-sets-for-earth-observation-applications/-/wikis/home
- @Connected: https://connected.cnect.cec.eu.int/groups/bigdataeoss 
- @Internet: https://jeodpp.jrc.ec.europa.eu/home/

**Contacts:**  jrc-jeodpp@ec.europa.eu

**Source data:** http://bigearth.net/

<img src="https://cidportal.jrc.ec.europa.eu/services/shared/html/JRClogo2.png" width="200" height="200" /> <img src="https://cidportal.jrc.ec.europa.eu/services/shared/html/JRCBigDataPlatform_512.png" width="200" height="200" /> 

In [None]:
import os, json, urllib.request
from copy import copy

In [None]:
# Please, download the script Query.py into the working directory
from Query import Query

In [None]:
def SearchLayer(layer, term, flag=1, elastic=0):
    # flag=1: pick-up the specific instance and all the terms of the level at which the instance has been found
    # flag=2: pick-up the specific instance, all the terms of the level at which the instance has been found and all the parent nodes
    # elastic=1: allows for elastic search, that is, allows for partial matching
    out = []
    term = term.lower()
    
    if isinstance(layer, dict) == True:
        keys = list(layer.keys())
    else:
        keys = layer
        
    if flag == 1:
        if term in keys:
            out = keys
    elif flag == 2:
        if elastic == 0 and term in keys:
            out = [term]
        elif elastic == 1:
            rout = []
            for item in keys:
                if term in item:
                    rout.append(item)
            out = rout
    if isinstance(layer, dict) == True:
        for k in keys:
            res = SearchLayer(layer[k], term, flag, elastic)
            if len(res) > 0:
                if isinstance(res, list):
                    out = out+[k]+res
                else:
                    out = out+[k, res]
    return list(dict.fromkeys(out))

## Definition of the hierarchy of the terms

In [None]:
metalayer = {
    "built-up": {
        "residential": [
                        "building", "damaged building", "cottage", "duplex", "hut", "tent", "shed", "damaged building", "residential building"
                        ],
        "industrial": [
                        "factory", "cotton mill", "gas house", "damaged building", "industrial building"
                        ],
        "facilities": [
                        "baseball court", "tennis court", "basketball court", "ground track field", "baseball diamond", "ground track field",
                        "soccer ball field", "swimming pool", "damaged building", "facility", "vehicle lot"
                        ],
        "infrastructure": [
                        "harbor", "bridge", "roundabout", "storage tank", "container-crane", "tower crane", "aircraft hangar", "damaged building",
                        "helipad", "storage tank", "shipping container lot", "pylon", "highway"
                            ],
        "construction": [
                         "construction site", "tower"
                            ],
        "areas": [
                    "continuous urban fabric", "discontinuous urban fabric"
                    ]
    },
    "transport means": {
        "vehicle": [
                    "large vehicle", "small vehicle", "passenger vehicle", "small car", "bus", "pickup truck", "utility truck", "truck", "cargo truck",
                    "truck tractor", "box trailer", "trailer", "flatbed trailer", "liquid tank", "crane truck", "railway vehicle", "passenger car",
                    "cargo car", "container car", "flat car", "tank car", "locomotive", "reach stacker", "straddle carrier", "mobile crane",
                    "dump truck", "haul truck", "scraper", "tractor", "front loader", "bulldozer", "excavator", "cement mixer", "ground grader"
                    ],
        "flying": [
                    "helicopter", "plane", "fixed-wing aircraft", "small aircraft", "passenger plane", "cargo plane"
                    ],
        "vessel": [
                    "ship", "maritime vessel", "motorboat", "sailboat", "tugboat", "barge", "fishing vessel", "ferry", "yacht", "container ship",
                    "oil tanker", "engineering vehicle" 
                    ]
    },
    "object": {
        "man-made": [
                      "shipping container", "pylon", "tower"
                        ]
    },
    "natural areas": { 
        "air": [
                "cloud"
                ],
        "land": [
                  "permanently irrigated land", "sclerophyllous vegetation", "beaches", "dunes", "sands", "estuaries", 
                  "vineyards", "coniferous forest", "mixed forest", "non-irrigated arable land", "fruit trees", "berry plantations", 
                  "agro-forestry areas", "transitional woodland", "shrub", "land principally occupied by agriculture", "broad-leaved forest",
                  "annual crop", "permanent crop", "herbaceous vegetation", "pasture", "forest"
                    ],
        "water": [
                   "sea", "ocean", "water courses", "water bodies", "river", "lake"
                    ]
    }
}

### Store the metalayer structure

In [None]:
# Uncomment in case you would like to store the metalayer structure
#with open(os.path.join('metalayer.json'), 'w', encoding='utf-8') as f:
#    json.dump(metalayer, f, indent=4)

### Load the metalayer structure

In [None]:
mainfolder = 'https://jeodpp.jrc.ec.europa.eu/ftp/public/MachineLearning/SatImNet/'

In [None]:
%%time
with urllib.request.urlopen(os.path.join(mainfolder, 'metalayer.json')) as f:
    metalayer = json.loads(f.read().decode())

In [None]:
# Example
metalayer['built-up']['residential']

# Search inside the metalayer

In [None]:
# Search for the term 'building'
SearchLayer(metalayer, 'Building', 2, 1)

In [None]:
SearchLayer(metalayer, 'court', 2, 1)

## Example: Search files in DOTA dataset

In [None]:
collection = 'DOTA'

In [None]:
with urllib.request.urlopen(os.path.join(mainfolder, collection, 'content.json')) as f:
    content = json.loads(f.read().decode())

In [None]:
# Search for the keyword 'court'
keywords = SearchLayer(metalayer, 'COURT', 2, 1)
nkeywords = copy(keywords)
for item in nkeywords:
    if '-' in item:
        keywords.append(item.replace('-', ' '))
    if ' ' in item:
        keywords.append(item.replace(' ', '-'))
keywords

In [None]:
Q = []
for k in keywords:
    print('Key:', k)
    query = Query(content['tree'], 
                   {'genre': 'png', 'class': [k]}, 'path')
    Q += query    
Q