Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XML sitemap used by robot.txt in market #677

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -58,6 +58,7 @@ Aquarius is a simple, lightweight scanner and API. It is built using Python, usi
- `POST api/v1/aquarius/assets/ddo/encrypt` and `POST api/v1/aquarius/assets/ddo/encryptashex`: encrypts the asset using the `EVENTS_ECIES_PRIVATE_KEY` env var. Unencrypted assets can be read by any Aquarius instance, but if you are running a private Aquarius, this makes your assets private.
- `GET api/v1/aquarius/chains/list`: lists all chains indexed by the Aquarius version
- `GET api/v1/aquarius/chains/status/<chain_id>`: shows the status of the chain corresponding to the given `chain_id`
- `GET api/v1/aquarius/assets/sitemap`: outputs an XML sitemap used by the [market](https://github.com/oceanprotocol/market)

### The EventsMonitor

Expand Down
83 changes: 83 additions & 0 deletions aquarius/app/assets.py
Expand Up @@ -5,6 +5,7 @@
import elasticsearch
import json
import logging
import html

from flask import Blueprint, jsonify, request, Response
from aquarius.ddo_checker.ddo_checker import (
Expand Down Expand Up @@ -507,3 +508,85 @@ def encrypt_ddo_as_hex():
jsonify(error=f"Encountered error when encrypting asset as hex: {str(e)}."),
500,
)

@assets.route("/sitemap", methods=["GET"])
def sitemap():
"""Runs a static native ES query, returns results as XML sitemap
---
tags:
- ddo
parameters:
- name: base
description: base URL of the market
required: false
type: string
responses:
200:
description: successful action
500:
description: elasticsearch exception
"""

query = {
"size": 9999,
"query":
{
"query_string":
{
"query": "(chainId:1 OR chainId:137 OR chainId:56 OR chainId:1285 OR chainId:246) -isInPurgatory:true"
}
},
"_source":
[
"id",
"created"
],
"sort":
{
"created": "desc"
}
}

base = request.args.get("base") if request.args.get("base") else "https://market.oceanprotocol.com"
base = base.strip("/")
base = html.escape(base)

# https://www.sitemaps.org/protocol.html
xml_header = """<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">"""
# static pages
xml_header = xml_header + "<url><loc>" + base + "/</loc><changefreq>hourly</changefreq><priority>1.0</priority></url>"
xml_header = xml_header + "<url><loc>" + base + "/publish</loc><changefreq>monthly</changefreq><priority>0.5</priority></url>"
xml_header = xml_header + "<url><loc>" + base + "/terms</loc><changefreq>monthly</changefreq><priority>0.4</priority></url>"
xml_header = xml_header + "<url><loc>" + base + "/privacy/en</loc><changefreq>monthly</changefreq><priority>0.4</priority></url>"
xml_header = xml_header + "<url><loc>" + base + "/privacy/fr</loc><changefreq>monthly</changefreq><priority>0.4</priority></url>"
xml_header = xml_header + "<url><loc>" + base + "/privacy/es</loc><changefreq>monthly</changefreq><priority>0.4</priority></url>"

xml_footer = """</urlset>"""
xml = ""

try:
results = es_instance.es.search(query)
hits = results.get("hits").get("hits")

for i in hits:
url = base + "/asset/" + i.get("_source").get("id")
created = i.get("_source").get("created")
xml = xml + "<url><loc>" + url + "</loc><lastmod>" + created + "</lastmod><changefreq>monthly</changefreq><priority>0.8</priority></url>"

resp = Response(xml_header + xml + xml_footer)
resp.headers['Content-Type'] = 'application/xml'
return resp

except elasticsearch.exceptions.TransportError as e:
error = e.error if isinstance(e.error, str) else str(e.error)
info = e.info if isinstance(e.info, dict) else ""
logger.info(
f"Received elasticsearch TransportError: {error}, more info: {info}."
)
return (
jsonify(error=error, info=info),
e.status_code,
)
except Exception as e:
logger.error(f"Received elasticsearch Error: {str(e)}.")
return jsonify(error=f"Encountered Elasticsearch Exception: {str(e)}"), 500