## Custom Named Entity Recognition with Azure Language Service

### Setting Up the Environment

In [None]:
import os
from dotenv import load_dotenv
import requests
import json
import time

load_dotenv()
language_endpoint = os.getenv("LANGUAGE_ENDPOINT")
language_key = os.getenv("LANGUAGE_KEY")
api_version = os.getenv("API_VERSION")
project_name = os.getenv("PROJECT_NAME")
model_deployment_name = os.getenv("MODEL_DEPLOYMENT_NAME")

### Preparing the Dataset

In [None]:
loan_agreeements = []
count = 1

for loan_agreement in os.listdir("LoanAgreements/Test"):
    with open(os.path.join("LoanAgreements/Test", loan_agreement), "r") as f:
        dictionary = {
            "id": count,
            "language": "en",
            "text": f.read()
        }
        loan_agreeements.append(dictionary)
        count += 1
print(f"Total test documents: {len(loan_agreeements)}")

### Creating a Batch Prediction Task Using the NER Model

In [None]:
url = f"{language_endpoint}/language/analyze-text/jobs?api-version={api_version}"

headers = {
    "Ocp-Apim-Subscription-Key": language_key,
    "Content-Type": "application/json"
}

body = {
  "displayName": "Extracting Entities",
  "analysisInput": {
    "documents": loan_agreeements
  },
  "tasks": [
     {
      "kind": "CustomEntityRecognition",
      "taskName": "Entity Recognition",
      "parameters": {
        "projectName": f"{project_name}",
        "deploymentName": f"{model_deployment_name}"
      }
    }
  ]
}

submit_response = requests.post(url, headers=headers, json=body)
submit_response.raise_for_status()

operation_location = submit_response.headers.get("operation-location")
if not operation_location:
    raise Exception("operation-location header missing in response. Cannot track the job.")

print(f"Operation-Location (job tracking URL):\n{operation_location}")

### Fetching the Results

In [None]:
url = f"{operation_location}"

headers = {
    "Ocp-Apim-Subscription-Key": language_key,
    "Content-Type": "application/json"
}

predictions = requests.get(url, headers=headers)
while predictions.json()["status"] != "succeeded":
    predictions = requests.get(url, headers=headers)
    print("Waiting for the job to complete...")
    time.sleep(10)

print("Job completed.")

print(predictions.json())

for document in predictions.json()["tasks"]["items"][0]["results"]["documents"]:
   print(f"\nDocument ID: {document['id']}")
   for entity in document["entities"]:
        print(f"Text: {entity['text']}")
        print(f"Category: {entity['category']}, Offset: {entity['offset']}, Length: {entity['length']}, Confidence Score: {entity['confidenceScore']}")
        print("\n")
   print("-----------------------------------------------------")