Skip to content

Commit

Permalink
[text analytics] add bing_id property to LinkedEntity class (Azure#13446
Browse files Browse the repository at this point in the history
)
  • Loading branch information
iscai-msft authored and rakshith91 committed Sep 4, 2020
1 parent 4357c10 commit f8f6c2a
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 7 deletions.
2 changes: 2 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ pass in `v3.0` to the kwarg `api_version` when creating your TextAnalyticsClient
- `offset` is the offset of the text from the start of the document
- We now have added support for opinion mining. To use this feature, you need to make sure you are using the service's
v3.1-preview.1 API. To get this support pass `show_opinion_mining` as True when calling the `analyze_sentiment` endpoint
- Add property `bing_id` to the `LinkedEntity` class. This property is only available for v3.1-preview.2 and up, and it is to be
used in conjunction with the Bing Entity Search API to fetch additional relevant information about the returned entity.

## 5.0.0 (2020-07-27)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ class TextAnalyticsApiVersion(str, Enum):

#: this is the default version
V3_1_PREVIEW_1 = "v3.1-preview.1"

# 3.1-preview.2 is not yet the default version since we don't have a
# reliable endpoint
V3_1_PREVIEW_2 = "v3.1-preview.2"
V3_0 = "v3.0"

def _authentication_policy(credential):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# coding=utf-8
# coding=utf-8 pylint: disable=too-many-lines
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
Expand Down Expand Up @@ -616,6 +616,11 @@ class LinkedEntity(DictMixin):
:ivar data_source: Data source used to extract entity linking,
such as Wiki/Bing etc.
:vartype data_source: str
:ivar str bing_id: Bing unique identifier of the recognized entity. Use in conjunction
with the Bing Entity Search SDK to fetch additional relevant information. Only
available for API version v3.1-preview.2 and up.
.. versionadded:: v3.1-preview.2
The *bing_id* property.
"""

def __init__(self, **kwargs):
Expand All @@ -625,22 +630,32 @@ def __init__(self, **kwargs):
self.data_source_entity_id = kwargs.get("data_source_entity_id", None)
self.url = kwargs.get("url", None)
self.data_source = kwargs.get("data_source", None)
self.bing_id = kwargs.get("bing_id", None)

@classmethod
def _from_generated(cls, entity):
bing_id = entity.bing_id if hasattr(entity, "bing_id") else None
return cls(
name=entity.name,
matches=[LinkedEntityMatch._from_generated(e) for e in entity.matches], # pylint: disable=protected-access
language=entity.language,
data_source_entity_id=entity.id,
url=entity.url,
data_source=entity.data_source,
bing_id=bing_id,
)

def __repr__(self):
return "LinkedEntity(name={}, matches={}, language={}, data_source_entity_id={}, url={}, " \
"data_source={})".format(self.name, repr(self.matches), self.language, self.data_source_entity_id,
self.url, self.data_source)[:1024]
"data_source={}, bing_id={})".format(
self.name,
repr(self.matches),
self.language,
self.data_source_entity_id,
self.url,
self.data_source,
self.bing_id,
)[:1024]


class LinkedEntityMatch(DictMixin):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
and Paul Allen", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '108'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"0","entities":[{"bingId":"0d47c987-0042-5576-15e8-97af601614fa","name":"Bill
Gates","matches":[{"text":"Bill Gates","offset":25,"length":10,"confidenceScore":0.52}],"language":"en","id":"Bill
Gates","url":"https://en.wikipedia.org/wiki/Bill_Gates","dataSource":"Wikipedia"},{"bingId":"df2c4376-9923-6a54-893f-2ee5a5badbc7","name":"Paul
Allen","matches":[{"text":"Paul Allen","offset":40,"length":10,"confidenceScore":0.54}],"language":"en","id":"Paul
Allen","url":"https://en.wikipedia.org/wiki/Paul_Allen","dataSource":"Wikipedia"},{"bingId":"a093e9b9-90f5-a3d5-c4b8-5855e1b01f85","name":"Microsoft","matches":[{"text":"Microsoft","offset":0,"length":9,"confidenceScore":0.49}],"language":"en","id":"Microsoft","url":"https://en.wikipedia.org/wiki/Microsoft","dataSource":"Wikipedia"}],"warnings":[]}],"errors":[],"modelVersion":"2020-02-01"}'
headers:
apim-request-id:
- 34b34e81-fcc2-4c1e-85b2-116f85196a4c
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Mon, 31 Aug 2020 18:48:40 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '27'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
and Paul Allen", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '108'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"0","entities":[{"bingId":"0d47c987-0042-5576-15e8-97af601614fa","name":"Bill
Gates","matches":[{"text":"Bill Gates","offset":25,"length":10,"confidenceScore":0.52}],"language":"en","id":"Bill
Gates","url":"https://en.wikipedia.org/wiki/Bill_Gates","dataSource":"Wikipedia"},{"bingId":"df2c4376-9923-6a54-893f-2ee5a5badbc7","name":"Paul
Allen","matches":[{"text":"Paul Allen","offset":40,"length":10,"confidenceScore":0.54}],"language":"en","id":"Paul
Allen","url":"https://en.wikipedia.org/wiki/Paul_Allen","dataSource":"Wikipedia"},{"bingId":"a093e9b9-90f5-a3d5-c4b8-5855e1b01f85","name":"Microsoft","matches":[{"text":"Microsoft","offset":0,"length":9,"confidenceScore":0.49}],"language":"en","id":"Microsoft","url":"https://en.wikipedia.org/wiki/Microsoft","dataSource":"Wikipedia"}],"warnings":[]}],"errors":[],"modelVersion":"2020-02-01"}'
headers:
apim-request-id: 70ab796e-3da1-4a55-86b4-16c4b19a97a8
content-type: application/json; charset=utf-8
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
date: Mon, 31 Aug 2020 18:48:41 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '26'
status:
code: 200
message: OK
url: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------

import os
import pytest
import platform
import functools
Expand Down Expand Up @@ -586,3 +586,17 @@ def test_string_index_type_not_fail_v3(self, client):
# make sure that the addition of the string_index_type kwarg for v3.1-preview.1 doesn't
# cause v3.0 calls to fail
client.recognize_linked_entities(["please don't fail"])

# currently only have this as playback since the dev endpoint is unreliable
@pytest.mark.playback_test_only
@GlobalTextAnalyticsAccountPreparer()
@TextAnalyticsClientPreparer(client_kwargs={
"api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2,
"text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'),
"text_analytics_account": "https://cognitiveusw2dev.azure-api.net/"
})
def test_bing_id(self, client):
result = client.recognize_linked_entities(["Microsoft was founded by Bill Gates and Paul Allen"])
for doc in result:
for entity in doc.entities:
assert entity.bing_id # this checks if it's None and if it's empty
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------

import os
import pytest
import platform
import functools
Expand Down Expand Up @@ -622,3 +622,17 @@ async def test_string_index_type_not_fail_v3(self, client):
# make sure that the addition of the string_index_type kwarg for v3.1-preview.1 doesn't
# cause v3.0 calls to fail
await client.recognize_linked_entities(["please don't fail"])

# currently only have this as playback since the dev endpoint is unreliable
@pytest.mark.playback_test_only
@GlobalTextAnalyticsAccountPreparer()
@TextAnalyticsClientPreparer(client_kwargs={
"api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2,
"text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'),
"text_analytics_account": "https://cognitiveusw2dev.azure-api.net/"
})
async def test_bing_id(self, client):
result = await client.recognize_linked_entities(["Microsoft was founded by Bill Gates and Paul Allen"])
for doc in result:
for entity in doc.entities:
assert entity.bing_id # this checks if it's None and if it's empty
7 changes: 5 additions & 2 deletions sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,15 @@ def linked_entity(linked_entity_match):
language="English",
data_source_entity_id="Bill Gates",
url="https://en.wikipedia.org/wiki/Bill_Gates",
data_source="wikipedia"
data_source="wikipedia",
bing_id="12345678"
)
model_repr = (
"LinkedEntity(name=Bill Gates, matches=[{}, {}], "\
"language=English, data_source_entity_id=Bill Gates, "\
"url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)".format(linked_entity_match[1], linked_entity_match[1])
"url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia, bing_id=12345678)".format(
linked_entity_match[1], linked_entity_match[1]
)
)
assert repr(model) == model_repr
return model, model_repr
Expand Down

0 comments on commit f8f6c2a

Please sign in to comment.