Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proposal - HTML Document Querying (Independent Publisher) #3478

Draft
wants to merge 4 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
{
"swagger": "2.0",
"info": {
"title": "HTML Document Querying",
"description": "This connector can query HTML documents. It used the HTML Agility Pack to interact with the provided HTML and uses XPath queries to extract information from the provided HTML document. The HTML document can also be downloaded from a HTTP address first.",
"version": "1.0",
"contact": {
"name": "Remy Blok",
"url": "https://rjb.solutions",
"email": "remyblok@remyblok.nl"
}
},
"host": "none.invalid",
"basePath": "/",
"schemes": [
"https"
],
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"paths": {
"/Internal-GetSchema": {
"post": {
"x-ms-visibility": "internal",
"operationId": "Internal-GetSchema",
"description": "Get dynamic schema",
"summary": "Get dynamic schema",
"parameters": [
{
"name": "body",
"in": "body",
"required": true,
"schema": {
"type": "object",
"properties": {
"queries": {
"$ref": "#/definitions/Queries"
}
},
"required": [
"queries"
]
}
}
],
"responses": {
"200": {
"description": "default",
"schema": {
"type": "object",
"properties": {}
}
}
}
}
},
"/QueryDocumentFromString": {
"post": {
"summary": "Execute Query to HTML document",
"description": "Executes the queries against de supplied HTML document",
"operationId": "QueryDocumentFromString",
"x-ms-visibility": "important",
"parameters": [
{
"name": "body",
"in": "body",
"required": true,
"schema": {
"type": "object",
"properties": {
"html": {
"type": "string",
"description": "The HTML document to query against",
"x-ms-summary": "HTML Document"
},
"queries": {
"$ref": "#/definitions/Queries"
}
},
"required": [
"html",
"queries"
]
}
}
],
"responses": {
"200": {
"$ref": "#/responses/dynamicSchema"
},
"400": {
"$ref": "#/responses/error"
}
}
}
},
"/QueryDocumentFromUrl": {
"post": {
"summary": "Execute Query to downloaded HTML document",
"description": "Executes the queries against the HTML document retrieved from the URL",
"operationId": "QueryDocumentFromUrl",
"x-ms-visibility": "important",
"parameters": [
{
"name": "body",
"in": "body",
"required": true,
"schema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL of the HTML document to query against",
"x-ms-summary": "Url to HTML document"
},
"queries": {
"$ref": "#/definitions/Queries"
}
},
"required": [
"url",
"queries"
]
}
}
],
"responses": {
"200": {
"$ref": "#/responses/dynamicSchema"
},
"400": {
"$ref": "#/responses/error"
}
}
}
}
},
"definitions": {
"Queries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The Query to execute. Can be an XPath query or a Power Automate Desktop CSS path",
"x-ms-summary": "Query",
"x-ms-visibility": "important"
},
"id": {
"type": "string",
"description": "Query ID for unique identification",
"x-ms-summary": "Query ID"
},
"selectMultiple": {
"type": "boolean",
"description": "Select a single result node or a list of result nodes",
"x-ms-summary": "Multiple results"
},
"resultMode": {
"type": "string",
"description": "Specify what result is retrured from the result nodes",
"x-ms-summary": "Result Mode",
"enum": [
"innerHtml",
"outerHtml",
"text",
"attribute"
],
"x-ms-visibility": "advanced",
"x-ms-enum": {
"name": "Result Mode",
"values": [
{
"value": "innerHtml",
"desciption": "Select the HTML inside the queried node",
"name": "Inner HTML"
},
{
"value": "outerHtml",
"desciption": "Select the full queried node",
"name": "Outer HTML"
},
{
"value": "text",
"desciption": "Select all text inside the queried node and child nodes",
"name": "Text Only"
},
{
"value": "attribute",
"desciption": "Select specific attribute from the node",
"name": "Specific Attribute"
}
]
}
},
"attribute": {
"type": "string",
"description": "Select the specified attribute of the queried nodes. Needs Result Mode 'Attribute'.",
"x-ms-summary": "Attribute name",
"x-ms-visibility": "advanced"
}
},
"required": [
"query"
]
},
"x-ms-summary": "Queries",
"description": "List of all queries to execute"
}
},
"parameters": {},
"responses": {
"dynamicSchema": {
"description": "Query Results",
"schema": {
"type": "object",
"x-ms-dynamic-schema": {
"operationId": "Internal-GetSchema",
"parameters": {
"queries": {
"parameter": "queries"
}
}
},
"x-ms-dynamic-properties": {
"operationId": "Internal-GetSchema",
"parameters": {
"body/queries": {
"parameterReference": "body/queries"
}
}
}
}
},
"error": {
"description": "Error",
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string"
}
}
}
}
},
"securityDefinitions": {},
"security": [],
"tags": [],
"x-ms-connector-metadata": [
{
"propertyName": "Website",
"propertyValue": "https://html-agility-pack.net/"
},
{
"propertyName": "Privacy policy",
"propertyValue": "https://html-agility-pack.net/privacy-policy"
},
{
"propertyName": "Categories",
"propertyValue": "IT Operations;Website"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"properties": {
"connectionParameters": {},
"iconBrandColor": "#da3b01",
"scriptOperations": [
"Internal-GetSchema",
"QueryDocumentFromString",
"QueryDocumentFromUrl"
],
"capabilities": [],
"policyTemplateInstances": [],
"publisher": "Remy Blok",
"stackOwner": "HTML Agility Pack"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
## HTML Document Querying
This connector can query HTML documents. It used the HTML Agility Pack to interact with the provided HTML and uses XPath queries to extract information from the provided HTML document. The HTML document can also be downloaded from a HTTP address first.

## Publisher: Remy Blok

## Prerequisites
There are no prerequisities for this connector

## Supported Operations

### Execute Query to HTML document
Queries the provided HTML document with the provided xpath queries.

### Execute Query to downloaded HTML document
Downloads a HTML document from the provided URL. Then queries the downloaded HTML document with the provided xpath queries.

## Obtaining Credentials
Anonymous authentication

## Known Issues and Limitations
When downloading an HTML document is needs to be accessable without credentials. Just a plain HTTP get request need to result in the document.