# Install pip dependenices

In [58]:
!pip install autogen-agentchat==0.4.2 --user
!pip install PyMuPDF --user



In [2]:
# configurations
pdf_path = "original/outlier-or-laggard-divergence-and-convergence-in-the-uks-recent-inflation-performance.pdf"
output_dir = "output/PyMuPDF/outlier-or-laggard-divergence-and-convergence-in-the-uks-recent-inflation-performance/"
# Configure Google Cloud project
PROJECT_ID = "cryptic-skyline-411516"
REGION = "us-central1"

In [15]:
from typing import AsyncGenerator, List, Sequence, Tuple

from autogen_agentchat.agents import BaseChatAgent
from autogen_agentchat.base import Response
from autogen_agentchat.messages import AgentEvent, ChatMessage, TextMessage
from autogen_core import CancellationToken

# define custom agent - convert pdf to pngs
class ConverPDFAgent(BaseChatAgent):
    def __init__(self, name: str, input_path: str, output_path: str, zoom: float=2.0):
        super().__init__(name, "An agent that convers PDFs to Images.")
        self._input_path = input_path
        self._output_path = output_path
        self._zoom = zoom

    @property
    def produced_message_types(self) -> Sequence[type[ChatMessage]]:
        return (TextMessage,)

    async def on_messages(self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken) -> Response:
        # Calls the on_messages_stream.
        response: Response | None = None
        async for message in self.on_messages_stream(messages, cancellation_token):
            if isinstance(message, Response):
                response = message
        assert response is not None
        return response

    async def on_messages_stream(
        self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
    ) -> AsyncGenerator[AgentEvent | ChatMessage | Response, None]:
        inner_messages: List[AgentEvent | ChatMessage] = []
        # for i in range(self._count, 0, -1):
        #     msg = TextMessage(content=f"{i}...", source=self.name)
        #     inner_messages.append(msg)
        #     yield msg
        with fitz.open(self._input_path) as pdf_document:
            if not os.path.exists(self._output_path):
                os.makedirs(self._output_path)
            # Loop through each page in the PDF
            for page_num in range(len(pdf_document)):
                page = pdf_document[page_num]
                # Zoom factor: higher zoom increases resolution
                mat = fitz.Matrix(self._zoom, self._zoom)
                pix = page.get_pixmap(matrix=mat)  # Render page to an image
                # Save the image
                output_file = os.path.join(self._output_path, f"page_{page_num + 1}.png")
                pix.save(output_file)
                msg = TextMessage(content=f"processing page_{page_num + 1}..", source=self.name)
                inner_messages.append(msg)
                yield msg
        yield Response(chat_message=TextMessage(content="Done!", source=self.name), inner_messages=inner_messages)

    async def on_reset(self, cancellation_token: CancellationToken) -> None:
        pass


async def run_countdown_agent() -> None:
    # Create a countdown agent.
    countdown_agent = ConverPDFAgent("countdown",pdf_path,output_dir)
    # Run the agent with a given task and stream the response.
    async for message in countdown_agent.on_messages_stream([], CancellationToken()):
        if isinstance(message, Response):
            print(message.chat_message.content)
        else:
            print(message.content)


# Use asyncio.run(run_countdown_agent()) when running in a script.
await run_countdown_agent()

processing page_1..
processing page_2..
processing page_3..
processing page_4..
processing page_5..
processing page_6..
processing page_7..
processing page_8..
processing page_9..
processing page_10..
processing page_11..
processing page_12..
processing page_13..
processing page_14..
Done!


In [106]:
import os
import asyncio
from autogen_core import CancellationToken
from autogen_agentchat.agents import UserProxyAgent
from autogen_agentchat.messages import TextMessage
import autogen
import vertexai
import traceback
import base64
from vertexai.generative_models import GenerativeModel, Part, Image

# Initialize Vertex AI client
vertexai.init(project=PROJECT_ID, location=REGION)
# Function to encode image to base64
def encode_image(image_path):
    print("encoding...")
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")
from pathlib import Path
def encode_pdf(pdf_path):
    return Path(pdf_path).read_bytes()

# Function to call Vertex AI Gemini for image-to-text summarization
async def call_vertex_gemini_document_to_text(doc_path: str, mime_type: str = None) -> str:
    try:
        model = GenerativeModel("gemini-1.5-flash-002")
        if(mime_type == "application/pdf"):
            print("summarise pdf...")            
            encoded=encode_pdf(doc_path)
            #print("encoded", base64.b64decode(encoded))
            attachment_file =  Part.from_data( mime_type="application/pdf",
                                    data=encoded)
            prompt = "Can you  summarise images from the attached pdf?"
        else:
            print("summarise image...")  
            attachment_file = Part.from_image(Image.load_from_file(doc_path))
            prompt = "Describe this image?"
        # Query the model
        
        print("total_tokens: ", model.count_tokens([attachment_file, prompt]))
        response = model.generate_content([attachment_file, prompt])
        print(response.usage_metadata)
        return response.text
    except Exception as e:
        print(traceback.format_exc())
        return f"Error calling Vertex AI: {str(e)}"

# Custom UserProxyAgent to interact with Gemini
class GeminiUserProxyAgent(UserProxyAgent):
    def __init__(self, name: str, input_path: str, mime_type : str = None):
        super().__init__(name=name, description="An agent that converts images to text.")
        self._input_path = input_path
        self._mime_type = mime_type
        
    async def on_messages(self, cancellation_token: CancellationToken):
        # Call Gemini using the Google Cloud SDK
        gemini_response = await call_vertex_gemini_document_to_text(doc_path=self._input_path, mime_type=self._mime_type)
        # Return the generated response from Gemini
        return TextMessage(content=gemini_response, source="gemini")

# Main function to run the agent
async def document_summary_agent(input_path: str, mime_type: str = None):
    agent = GeminiUserProxyAgent(name="image_summariser", input_path=input_path, mime_type=mime_type)
    # User's input message
    user_message = TextMessage(content="Summarise attached message?", source="user")
    # Send the message and get the response from Gemini
    response = await agent.on_messages(
        cancellation_token=CancellationToken(),
    )
    
    print(f"Gemini's response: {response.content}")

# Run the async function
await document_summary_agent(output_dir+"//page_8.png")
await document_summary_agent("C:\\Code\\ml\\playground\\original\\outlier-or-laggard-divergence-and-convergence-in-the-uks-recent-inflation-performance.pdf","application/pdf")

summarise image...
total_tokens:  total_tokens: 262
total_billable_characters: 18

prompt_token_count: 262
candidates_token_count: 280
total_token_count: 542

Gemini's response: This image contains two charts from a Bank of England report, focusing on inflation and pay growth. 


**Chart 5:** This line chart shows the one-year-ahead inflation expectations from three sources (Bank/Ipsos, Citi/YouGov, and Decision Maker Panel) from 2008 to 2024.  The chart highlights a significant drop in inflation expectations from mid-2022 to early 2024. Dashed lines indicate the average inflation expectations from 2006 to 2019.


**Chart 6:** This stacked bar chart displays the components of private sector regular pay growth from 2018 to 2024. The components include: inflation expectations, productivity, unexplained factors (residual), and slack. The chart shows a decline in pay growth from 2023 to 2024, accompanied by the decrease in inflation expectations. 


**Overall Report Narrative:** The report