In [6]:
# Install required libraries
!pip install azure-ai-formrecognizer azure-core python-dotenv google-generativeai pandas

Collecting azure-ai-formrecognizer
  Using cached azure_ai_formrecognizer-3.3.3-py3-none-any.whl.metadata (64 kB)
Collecting azure-core
  Using cached azure_core-1.35.0-py3-none-any.whl.metadata (44 kB)
Collecting python-dotenv
  Using cached python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting google-generativeai
  Using cached google_generativeai-0.8.5-py3-none-any.whl.metadata (3.9 kB)
Collecting pandas
  Downloading pandas-2.3.2-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting msrest>=0.6.21 (from azure-ai-formrecognizer)
  Using cached msrest-0.7.1-py3-none-any.whl.metadata (21 kB)
Collecting azure-common>=1.1 (from azure-ai-formrecognizer)
  Using cached azure_common-1.1.28-py2.py3-none-any.whl.metadata (5.0 kB)
Collecting typing-extensions>=4.0.1 (from azure-ai-formrecognizer)
  Using cached typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Collecting requests>=2.21.0 (from azure-core)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
C

In [7]:
# If running in Jupyter, restart the kernel after installation if needed

In [8]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
import os
from dotenv import load_dotenv
import google.generativeai as genai
import pandas as pd


def analyze_general_documents():
    path_to_sample_documents = os.path.abspath(r"C:\Users\HP\OneDrive\Desktop\DigiGov\Certificatet Data Extraction\Certificatet Data Extraction\10th_long_memo.pdf")

    endpoint = "https://form-recognition-project.cognitiveservices.azure.com/"
    key = "59268cc137cf4ae891e344c114ac7eeb"

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    with open(path_to_sample_documents, "rb") as f:
        poller = document_analysis_client.begin_analyze_document(
            "prebuilt-document", document=f
        )
    result = poller.result()

    # Initialize multi-line string to store all data
    output = ""

    # Extract key-value pairs
    output += "----Key-value pairs found in document----\n"
    for kv_pair in result.key_value_pairs:
        if kv_pair.key:
            output += f"'{kv_pair.key.content}': '{kv_pair.value.content if kv_pair.value else None}'\n"

    # Extract tables
    output += "\n----Tables found in document----\n"
    for table_idx, table in enumerate(result.tables):
        output += f"Table: {table_idx + 1}\n"
        output += f"Row Count: {table.row_count}, Column Count: {table.column_count}\n"
        output += "Cells:\n"
        for cell in table.cells:
            output += f"Row Index: {cell.row_index}, Column Index: {cell.column_index}, Content: {cell.content}\n"
        output += "----------------------------------------\n"

    return output


if __name__ == "__main__":
    import sys
    from azure.core.exceptions import HttpResponseError

    try:
        document_data = analyze_general_documents()
        # Do not print, just store in a variable
    except HttpResponseError as error:
        print(
            "For more information about troubleshooting errors, see the following guide: "
            "https://aka.ms/azsdk/python/formrecognizer/troubleshooting"
        )
        # Examples of how to check an HttpResponseError
        # Check by error code:
        if error.error is not None:
            if error.error.code == "InvalidImage":
                print(f"Received an invalid image error: {error.error}")
            if error.error.code == "InvalidRequest":
                print(f"Received an invalid request error: {error.error}")
            # Raise the error again after printing it
            raise
        # If the inner error is None and then it is possible to check the message to get more information:
        if "Invalid request".casefold() in error.message.casefold():
            print(f"Uh-oh! Seems there was an invalid request: {error}")
        # Raise the error again
        raise

# ----------------------------------------------------------------------------------------------------------------------------------

# Loading environment variables
load_dotenv()

# Configuring Google Generative AI with API key
genai.configure(api_key='AIzaSyCDGhmT6kSuZxqj1Di35UspQ8wFHVIhF2I')

# Function to load Gemini Pro model and get responses
def get_gemini_response(question):
    model = genai.GenerativeModel("gemini-pro")
    chat = model.start_chat(history=[])
    response = chat.send_message(question, stream=True)
    return response


answer = []
# Ask a query and print the response
# query = input("Enter your query: ")
query = "Tell me the name of the person only without any extra words to whom the details are reffering to? If not found give output as NULL only"
query = query + document_data
response = get_gemini_response(query)
print("Response:")
for chunk in response:
    answer.append(chunk.text)
full_answer = ''.join(answer)
print(full_answer)
answer1 = []
query = "Tell me the father name of the person only without any extra words to whom the details are reffering to?If not found give output as NULL only"
query = query + document_data
response1 = get_gemini_response(query)
print("Response1:")
for chunk in response1:
    answer1.append(chunk.text)
full_answer1 = ''.join(answer1)
print(full_answer1)
answer2 = []
query = "Tell me the mother name of the person only without any extra words to whom the details are reffering to?If not found give output as NULL only"
query = query + document_data
response2 = get_gemini_response(query)
print("Response2:")
for chunk in response2:
    answer2.append(chunk.text)
full_answer2 = ''.join(answer2)
print(full_answer2)
answer3 = []
query = "Tell me the only CGPA of the person without any extra words to whom the details are reffering to?If not found give output as NULL only"
query = query + document_data
response3 = get_gemini_response(query)
print("Response3:")
for chunk in response3:
    answer3.append(chunk.text)
full_answer3 = ''.join(answer3)
print(full_answer3)

import pandas as pd

# Read the original DataFrame
df = pd.read_csv('Data.csv')

# Create a new DataFrame with the data to append
new_data = pd.DataFrame({'Name': [full_answer],
                         'Father Name': [full_answer1],
                         'Mother Name': [full_answer2],
                         'CGPA': [full_answer3]})

# Concatenate the original DataFrame with the new data
df = pd.concat([df, new_data], ignore_index=True)
df.to_csv('Data.csv', index=False)

  from .autonotebook import tqdm as notebook_tqdm


ServiceRequestError: <urllib3.connection.HTTPSConnection object at 0x0000020281200680>: Failed to resolve 'form-recognition-project.cognitiveservices.azure.com' ([Errno 11001] getaddrinfo failed)

In [None]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
import os
from dotenv import load_dotenv
import google.generativeai as genai
import pandas as pd


def analyze_general_documents():
    path_to_sample_documents = os.path.abspath(r"C:\Users\HP\OneDrive\Desktop\DigiGov\Certificatet Data Extraction\Certificatet Data Extraction\10th_long_memo.pdf")

    endpoint = "https://form-recognition-project.cognitiveservices.azure.com/"
    key = "59268cc137cf4ae891e344c114ac7eeb"

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    with open(path_to_sample_documents, "rb") as f:
        poller = document_analysis_client.begin_analyze_document(
            "prebuilt-document", document=f
        )
    result = poller.result()

    # Initialize multi-line string to store all data
    output = ""

    # Extract key-value pairs
    output += "----Key-value pairs found in document----\n"
    for kv_pair in result.key_value_pairs:
        if kv_pair.key:
            output += f"'{kv_pair.key.content}': '{kv_pair.value.content if kv_pair.value else None}'\n"

    # Extract tables
    output += "\n----Tables found in document----\n"
    for table_idx, table in enumerate(result.tables):
        output += f"Table: {table_idx + 1}\n"
        output += f"Row Count: {table.row_count}, Column Count: {table.column_count}\n"
        output += "Cells:\n"
        for cell in table.cells:
            output += f"Row Index: {cell.row_index}, Column Index: {cell.column_index}, Content: {cell.content}\n"
        output += "----------------------------------------\n"

    return output


if __name__ == "__main__":
    import sys
    from azure.core.exceptions import HttpResponseError

    try:
        document_data = analyze_general_documents()
        # Do not print, just store in a variable
    except HttpResponseError as error:
        print(
            "For more information about troubleshooting errors, see the following guide: "
            "https://aka.ms/azsdk/python/formrecognizer/troubleshooting"
        )
        # Examples of how to check an HttpResponseError
        # Check by error code:
        if error.error is not None:
            if error.error.code == "InvalidImage":
                print(f"Received an invalid image error: {error.error}")
            if error.error.code == "InvalidRequest":
                print(f"Received an invalid request error: {error.error}")
            # Raise the error again after printing it
            raise
        # If the inner error is None and then it is possible to check the message to get more information:
        if "Invalid request".casefold() in error.message.casefold():
            print(f"Uh-oh! Seems there was an invalid request: {error}")
        # Raise the error again
        raise

# ----------------------------------------------------------------------------------------------------------------------------------

# Loading environment variables
load_dotenv()

# Configuring Google Generative AI with API key
genai.configure(api_key='AIzaSyCDGhmT6kSuZxqj1Di35UspQ8wFHVIhF2I')

# Function to load Gemini Pro model and get responses
def get_gemini_response(question):
    model = genai.GenerativeModel("gemini-pro")
    chat = model.start_chat(history=[])
    response = chat.send_message(question, stream=True)
    return response


answer = []
# Ask a query and print the response
# query = input("Enter your query: ")
query = "Tell me the name of the person only without any extra words to whom the details are reffering to?"
query = query + document_data
response = get_gemini_response(query)
print("Response:")
for chunk in response:
    answer.append(chunk.text)
full_answer = ''.join(answer)
print(full_answer)
answer1 = []
query = "Tell me the father name of the person only without any extra words to whom the details are reffering to?"
query = query + document_data
response1 = get_gemini_response(query)
print("Response1:")
for chunk in response1:
    answer1.append(chunk.text)
full_answer1 = ''.join(answer1)
print(full_answer1)
answer2 = []
query = "Tell me the mother name of the person only without any extra words to whom the details are reffering to?"
query = query + document_data
response2 = get_gemini_response(query)
print("Response2:")
for chunk in response2:
    answer2.append(chunk.text)
full_answer2 = ''.join(answer2)
print(full_answer2)
answer3 = []
query = "Tell me the only CGPA of the person without any extra words to whom the details are reffering to?"
query = query + document_data
response3 = get_gemini_response(query)
print("Response3:")
for chunk in response3:
    answer3.append(chunk.text)
full_answer3 = ''.join(answer3)
print(full_answer3)

import pandas as pd

# Read the original DataFrame
df = pd.read_csv('Data.csv')

# Create a new DataFrame with the data to append
new_data = pd.DataFrame({'Name': [full_answer],
                         'Father Name': [full_answer1],
                         'Mother Name': [full_answer2],
                         'CGPA': [full_answer3]})

# Concatenate the original DataFrame with the new data
df = pd.concat([df, new_data], ignore_index=True)
df.to_csv('Data.csv', index=False)

ModuleNotFoundError: No module named 'azure'