# Document Analysis
Compare same document or similar documents with version difference

Navigate to the files you want to analyse in the portal - it should be uploaded into blob storage  
On the far right there are 3 dots from there you can generate a SAS url for the file.
in your .env file populate BLOB_SAS_URL and BLOB_SAS_URL_1 with the SAS url.of the 2 files.

### 1. Install required libraries

In [None]:
%pip install azure-storage-blob azure-ai-formrecognizer azure-identity openai

### 2. Import helper libraries and load credentials from .env file

In [None]:
import os
from openai import AzureOpenAI
import numpy as np
from dotenv import load_dotenv
load_dotenv()

### 2. Create DocumentComparisonController class 
Designed to compare two PDF documents using Azure AI Document Intelligence and Azure OpenAI services. It extracts text content from the documents and uses OpenAI to analyze and compare the documents based on a user-provided prompt.

In [None]:
from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential
from openai import AzureOpenAI
import datetime

class DocumentComparisonController:
    def __init__(self):
        self.FormRecogEndpoint = os.environ['FORM_RECOGNIZER_ENDPOINT']
        self.FormRecogKey = os.environ['FORM_RECOGNIZER_KEY']
        self.AOAIendpoint = os.environ['OPENAI_API_ENDPOINT']
        self.AOAIKey = os.environ['OPENAI_API_KEY']
        self.AOAIDeploymentName = os.environ['GPT4_MODEL_NAME']
        self.AOAIAPIVersion = os.environ['OPENAI_API_VERSION']
        self.model = {}

    def document_comparison_post(self, document_urls, prompt):
        self.model['PdfUrl1'] = document_urls[0]
        self.model['PdfUrl2'] = document_urls[1]
        
        output_result = ["", ""]

        # Call AI Doc Intelligence service
        for i, url in enumerate(document_urls):
            client = DocumentAnalysisClient(endpoint=self.FormRecogEndpoint, credential=AzureKeyCredential(self.FormRecogKey))
            poller = client.begin_analyze_document_from_url("prebuilt-layout", url)
            result = poller.result()
            output_result[i] = result.content

        try:
            aoai_client = AzureOpenAI(
             azure_endpoint=self.AOAIendpoint,
             api_key=self.AOAIKey,  # this is also the default, it can be omitted
             api_version=self.AOAIAPIVersion,
             )

            messages = [
                # Here you can play around and set the system context to GPT models
                {"role": "system", "content": f"You are specialized in analyzing different versions of the same PDF document. The first Document OCR result is: <<<{output_result[0]}>>> and the second Document OCR result is: <<<{output_result[1]}>>>"},
                {"role": "user", "content": f"User question: {prompt}"}
            ]
            # Call OpenAI service
            response = aoai_client.chat.completions.create(
                model=self.AOAIDeploymentName,
                messages=messages,
                max_tokens=1000,
                temperature=0.7,
                frequency_penalty=0,
                presence_penalty=0,
                top_p=0.95
            )

            self.model['Message'] = response.choices[0].message.content
        except Exception as e:
            raise e

        return self.model

controller = DocumentComparisonController()

# Example usage
doc_url1 = os.getenv("BLOB_SAS_URL")
doc_url2 = os.getenv("BLOB_SAS_URL_1")
document_urls = [doc_url1, doc_url2]
# tab_names = ["Tab1", "Tab2"]
prompt = "Compare the documents and provide similary and difference as table format"

result = controller.document_comparison_post(document_urls, prompt)
print("result----->",result)