# Create a custom text classification solution

The Azure AI Language service enables processing of natural language to use in your own app. Learn how to build a custom text classification project.

- Understand types of classification projects
- Build a custom text classification project
- Tag data, train, and deploy a model
- Submit classification tasks from your own app

# Importing libraries

In [1]:
from dotenv import load_dotenv
import os

In [2]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

# Loading configurations

In [3]:
load_dotenv()
ai_endpoint = os.getenv('AI_LANGUAGE_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_LANGUAGE_SERVICE_KEY')
project_name = os.getenv('PROJECT_NAME')
deployment_name = os.getenv('DEPLOYMENT_NAME')

True

# Creating SDK client

In [4]:
credential = AzureKeyCredential(ai_key)
ai_client = TextAnalyticsClient(endpoint=ai_endpoint, credential=credential)

# Main logic

In [11]:
def read_documents(folder_path):
  documents = []
  files = os.listdir(folder_path)

  for file_name in files:
    file_path = os.path.join(folder_path, file_name)
    # Read the file contents
    text = open(file_path, encoding='utf8').read()
    documents.append(text)

  return files, documents

In [10]:
def classify_documents(documents):
  operation = ai_client.begin_single_label_classify(
      documents,
      project_name=project_name,
      deployment_name=deployment_name
  )

  return operation.result()

In [12]:
def print_classification_results(file_names, results):
  for file_name, classification_result in zip(file_names, results):
    if classification_result.kind == "CustomDocumentClassification":
      classification = classification_result.classifications[0]
      print(f"{file_name} was classified as '{
            classification.category}' with confidence score {classification.confidence_score}.")
    elif classification_result.is_error is True:
      print(f"{file_name} has an error with code '{
            classification_result.error.code}' and message '{classification_result.error.message}'")

In [15]:
file_names, batched_documents = read_documents('./Testing-Data/')
document_results = classify_documents(batched_documents)
print_classification_results(file_names, document_results)

test1.txt was classified as 'Entertainment' with confidence score 0.33.
test2.txt was classified as 'Sports' with confidence score 0.33.
test3.txt was classified as 'Sports' with confidence score 0.32.
test4.txt was classified as 'Entertainment' with confidence score 0.34.
