In [74]:
# import libraries
import os
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient


In [83]:
# Load environment variables
load_dotenv()

# change this
endpoint = os.getenv("AZURE_COGNITIVE_ENDPOINT")
key = os.getenv("AZURE_COGNITIVE_KEY")
gpt4_endpoint = os.getenv("OPENAI_ENDPOINT")
gpt4_key = os.getenv("OPENAI_KEY")

# Step 1: azure Document Intelligence

Input: a pdf n

Output: location of identified text, table and figures with corresponding text

In [16]:
# sample document
base_path = os.path.abspath(os.path.join(os.getcwd(), '../data'))

formPath = os.path.join(base_path, "10k/plya_consolidated_balance_sheets.pdf")
print(formPath)

/Users/lingruiz/Documents/capstone/data/10k/plya_consolidated_balance_sheets.pdf


In [20]:
def open_pdf(path):
    with open(path, "rb") as f:
        return f.read()


def get_result(formPath, client):
    # read and analyze the document
    poller = client.begin_analyze_document("prebuilt-layout", open_pdf(formPath))
    result = poller.result()
    return result

In [21]:
# create a client
document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

In [22]:
# run the service, save result
result = get_result(formPath, document_analysis_client)


## Explore result structure

In [42]:
# tables are embedded in result.tables, to query the tables, do the following: 
# result.tables returns a list of tables in the pdf document, 
# access row count with row_count, and column count with column_count


for index, table in enumerate(result.tables):
    print(f"table #{index}: \nrow count:{table.row_count}")
    print(f"column count:{table.column_count} \n")

table #0: 
row count:32
column count:3 

table #1: 
row count:32
column count:4 



# Convert result to df/csv

In [45]:
def convert_azdoc_todf(tables):
    tablesCollected = []
    for table_idx, table in enumerate(tables):
        # Initialize an empty matrix
        matrix = [["" for _ in range(table.column_count)] for _ in range(table.row_count)]
        tablesCollected.append(matrix)
        for cell in table.cells:
            row_index = cell.row_index
            column_index = cell.column_index

            if row_index < table.row_count and column_index < table.column_count:
                matrix[row_index][column_index] = cell.content
    
    return tablesCollected

In [90]:
def convert_azdoc_tostring(tables):
    result = ""
    for table_idx, table in enumerate(tables):
        result += f"Table {table_idx + 1}:\n"
        matrix = [["" for _ in range(table.column_count)] for _ in range(table.row_count)]
        
        for cell in table.cells:
            row_index = cell.row_index
            column_index = cell.column_index

            if row_index < table.row_count and column_index < table.column_count:
                matrix[row_index][column_index] = cell.content
        
        # Convert each row in the matrix to a string
        for row in matrix:
            row_str = "\t".join(row)  # Separate columns by tabs for readability
            result += row_str + "\n"
        
        result += "\n"  # Separate tables by a blank line
        
    return result

def convert_azdoc_to_markdown(tables):
    markdown_tables = ""
    for table_idx, table in enumerate(tables):
        # Initialize an empty matrix for the table
        matrix = [["" for _ in range(table.column_count)] for _ in range(table.row_count)]
        
        # Fill in the matrix with cell content
        for cell in table.cells:
            row_index = cell.row_index
            column_index = cell.column_index
            if row_index < table.row_count and column_index < table.column_count:
                matrix[row_index][column_index] = cell.content
        
        # Convert matrix to Markdown format
        markdown_table = "| " + " | ".join(matrix[0]) + " |\n"  # Header row
        markdown_table += "| " + " | ".join(['---' for _ in matrix[0]]) + " |\n"  # Divider row
        
        for row in matrix[1:]:  # Data rows
            markdown_table += "| " + " | ".join(row) + " |\n"
        
        # Add each table to the result with a separating line between tables
        markdown_tables += f"### Table {table_idx + 1}\n" + markdown_table + "\n\n"
    
    return markdown_tables



In [91]:
table_md = convert_azdoc_to_markdown(result.tables)

## Step 2: Azure OpenAI Studio 

In [84]:
gpt4_endpoint

'https://ai-lingruiz7765ai414752438778.openai.azure.com'

In [80]:
from openai import AzureOpenAI
    
client = AzureOpenAI(
    api_key=gpt4_key,  
    api_version="2024-08-01-preview",
    azure_endpoint = gpt4_endpoint
    )

In [87]:
SYSTEM_PROMPT = "given the balance sheet above, calculate below ratios: Current Ratio = Current Assets / Current Liabilities and compute  a credit score to evaluate the financial status of the company"

In [85]:
# Define deployment name for the model
# if needed, add to session chat history to prior prompt the model
# example use: 
# {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
#  # {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},

deployment_name = 'gpt-4'

response = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": table_md}
    ]
)

print(response.choices[0].message.content)


Yes, many Azure AI services provide support for customer managed keys as part of the security and compliance features. Services such as Azure Cognitive Services allow you to use customer managed keys to control and manage the encryption of your data at rest. This feature is implemented to enhance the security measures, ensuring that data handling complies with strict privacy and regulatory requirements. Always check the specific documentation for each Azure AI service to confirm the availability and specific implementation details of customer managed keys for that service.
