# Create a custom named entity extraction solution

Build a custom entity recognition solution to extract entities from unstructured documents

- Understand custom named entities and how they're labeled.
- Build a Language service project.
- Label data, train, and deploy an entity extraction model.
- Submit extraction tasks from your own app.

# Importing libraries

In [2]:
from dotenv import load_dotenv
import os

In [1]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

# Loading configurations

In [3]:
load_dotenv()
ai_endpoint = os.getenv('AI_LANGUAGE_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_LANGUAGE_SERVICE_KEY')
project_name = os.getenv('PROJECT_NAME')
deployment_name = os.getenv('DEPLOYMENT_NAME')

True

# Creating SDK client

In [4]:
credential = AzureKeyCredential(ai_key)
ai_client = TextAnalyticsClient(endpoint=ai_endpoint, credential=credential)

# Main logic

In [10]:
def read_documents(folder_path):
  documents = []
  files = os.listdir(folder_path)

  for file_name in files:
    file_path = os.path.join(folder_path, file_name)
    # Read the file contents
    text = open(file_path, encoding='utf8').read()
    documents.append(text)

  return files, documents

In [11]:

def extract_custom_entities(documents):
  operation = ai_client.begin_recognize_custom_entities(
      documents,
      project_name=project_name,
      deployment_name=deployment_name
  )

  return operation.result()

In [12]:
def print_entities_results(file_names, results):
  for file_name, entities_result in zip(file_names, results):
    print(file_name)
    if entities_result.kind == "CustomEntityRecognition":
      for entity in entities_result.entities:
        print(f"\tEntity '{entity.text}' has category '{
              entity.category}' with confidence score of '{entity.confidence_score}'")
    elif entities_result.is_error is True:
      print(f"\tError with code '{entities_result.error.code}' and message '{
            entities_result.error.message}'")

In [13]:
file_names, batched_documents = read_documents('./Testing-Data/')
entities_results = extract_custom_entities(batched_documents)
print_entities_results(file_names, entities_results)

test1.txt
	Entity 'Bluetooth earbuds' has category 'ItemForSale' with confidence score of '0.97'
	Entity '$100' has category 'Price' with confidence score of '0.98'
	Entity 'Sacramento, CA' has category 'Location' with confidence score of '0.99'
test2.txt
	Entity 'Dog harness' has category 'ItemForSale' with confidence score of '0.98'
	Entity '$20' has category 'Price' with confidence score of '0.99'
	Entity 'Tucson, AZ' has category 'Location' with confidence score of '0.99'
