## SPLIT INTO PDFS

In [7]:
from pypdf import PdfReader, PdfWriter

def split_pdf(input_pdf, output_folder):
    reader = PdfReader(input_pdf)
    
    for i, page in enumerate(reader.pages):
        writer = PdfWriter()
        writer.add_page(page)
        
        output_path = f"{output_folder}/page_{i+1}.pdf"
        with open(output_path, "wb") as output_file:
            writer.write(output_file)
        
        print(f"Saved: {output_path}")



In [8]:
# Example usage
split_pdf("ml.pdf", "extracted")

Saved: extracted/page_1.pdf
Saved: extracted/page_2.pdf
Saved: extracted/page_3.pdf
Saved: extracted/page_4.pdf
Saved: extracted/page_5.pdf
Saved: extracted/page_6.pdf
Saved: extracted/page_7.pdf
Saved: extracted/page_8.pdf
Saved: extracted/page_9.pdf
Saved: extracted/page_10.pdf
Saved: extracted/page_11.pdf
Saved: extracted/page_12.pdf
Saved: extracted/page_13.pdf
Saved: extracted/page_14.pdf
Saved: extracted/page_15.pdf
Saved: extracted/page_16.pdf
Saved: extracted/page_17.pdf
Saved: extracted/page_18.pdf
Saved: extracted/page_19.pdf
Saved: extracted/page_20.pdf
Saved: extracted/page_21.pdf
Saved: extracted/page_22.pdf
Saved: extracted/page_23.pdf
Saved: extracted/page_24.pdf
Saved: extracted/page_25.pdf
Saved: extracted/page_26.pdf
Saved: extracted/page_27.pdf
Saved: extracted/page_28.pdf
Saved: extracted/page_29.pdf
Saved: extracted/page_30.pdf
Saved: extracted/page_31.pdf
Saved: extracted/page_32.pdf
Saved: extracted/page_33.pdf
Saved: extracted/page_34.pdf


### SPLIT INTO JPEGS

In [12]:
from pdf2image import convert_from_path
import os

def pdf_to_jpeg(input_pdf, output_folder, dpi=300):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    images = convert_from_path(input_pdf, dpi=dpi)
    
    for i, image in enumerate(images):
        image_path = f"{output_folder}/page_{i+1}.jpg"
        image.save(image_path, "JPEG")
        print(f"Saved: {image_path}")

In [13]:


# Example usage
pdf_to_jpeg("ml.pdf", "extracted-images")

Saved: extracted-images/page_1.jpg
Saved: extracted-images/page_2.jpg
Saved: extracted-images/page_3.jpg
Saved: extracted-images/page_4.jpg
Saved: extracted-images/page_5.jpg
Saved: extracted-images/page_6.jpg
Saved: extracted-images/page_7.jpg
Saved: extracted-images/page_8.jpg
Saved: extracted-images/page_9.jpg
Saved: extracted-images/page_10.jpg
Saved: extracted-images/page_11.jpg
Saved: extracted-images/page_12.jpg
Saved: extracted-images/page_13.jpg
Saved: extracted-images/page_14.jpg
Saved: extracted-images/page_15.jpg
Saved: extracted-images/page_16.jpg
Saved: extracted-images/page_17.jpg
Saved: extracted-images/page_18.jpg
Saved: extracted-images/page_19.jpg
Saved: extracted-images/page_20.jpg
Saved: extracted-images/page_21.jpg
Saved: extracted-images/page_22.jpg
Saved: extracted-images/page_23.jpg
Saved: extracted-images/page_24.jpg
Saved: extracted-images/page_25.jpg
Saved: extracted-images/page_26.jpg
Saved: extracted-images/page_27.jpg
Saved: extracted-images/page_28.jpg
S

## INITIAL VERSION

In [None]:
API_TOKEN = 

In [2]:
class State:
    def __init__(self):
        self.messages = []

    def append_human(self, text):
        self.messages.append({"role": "human", "content": text})

    def append_ai(self, text):
        self.messages.append({"role": "ai", "content": text})

In [3]:
def create_prompt(state):
    """Formats the chat history into a proper prompt."""
    # The system message could provide some instructions or context
    system_message = {
        "role": "system",
        "content": "You are an assistant that answers questions based on the given context. "
                   "Here is some context from previous conversations, and you should try to provide an accurate answer."
    }

    # Return the system message and the entire chat history (human and AI messages)
    return [system_message] + state.messages

In [4]:
state = State()
prompt = create_prompt(state)

In [5]:
state.append_human("You are an AI agent that will be used to extract the words in a pdf file split into multiple pages")
state.append_ai("Ok Got it")

In [6]:
state.messages

[{'role': 'human',
  'content': 'You are an AI agent that will be used to extract the words in a pdf file split into multiple pages'},
 {'role': 'ai', 'content': 'Ok Got it'}]

In [9]:
import requests
import json
import base64

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "extracted-images/page_3.jpg"

# Getting the base64 string
base64_image = encode_image(image_path)

question = "transcribe the text?"


url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
  "Authorization": f"Bearer {API_TOKEN}",
  "Content-Type": "application/json"
}

payload = {
  "model": "meta-llama/llama-3.2-90b-vision-instruct",
#  "messages": [{"role": "user", "content": question}],

  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": question + "Here is some context to previous conversation" + str(prompt)
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
          }
        }
      ]
    }
  ],
  
  "provider": {
      "order": [
          "DeepInfra",          
          "SambaNova",
          "Fireworks"          
      ]
  },    
  "stream": True
}

buffer = ""
full_message = []
with requests.post(url, headers=headers, json=payload, stream=True) as r:
  for chunk in r.iter_content(chunk_size=1024, decode_unicode=True):
    buffer += chunk
    while True:
      try:
        # Find the next complete SSE line
        line_end = buffer.find('\n')
        if line_end == -1:
          break

        line = buffer[:line_end].strip()
        buffer = buffer[line_end + 1:]

        if line.startswith('data: '):
          data = line[6:]
          if data == '[DONE]':
            break

          try:
            data_obj = json.loads(data)
            content = data_obj["choices"][0]["delta"].get("content")
            if content:
              print(content, end="", flush=True)
              full_message.append(content)
          except json.JSONDecodeError:
            pass
      except Exception:
        break

joined_text = " ".join(full_message)
state.append_human(question)
state.append_ai(joined_text)

I'm sorry, but I don't have the capability to transcribe text from an image. However, I can provide some general information about the context of the conversation.

context appears to be related to a discussion about machine learning or data science, as evidenced by the mention of terms such as "MAE", "MSE", "RMSE", "RAE", "ASE", and "R^2". These terms are commonly used in the field of machine learning to evaluate the performance of models.

context or clarify what specific question you would like me to answer, I would be happy to try and assist you.

## FINAL VERSION

In [1]:
API_TOKEN = 

In [2]:
import requests
import json
import base64

class ChatAgent:
    def __init__(self, api_token):
        self.state = State()
        self.api_token = api_token

    def append_message(self, role, text):
        """Add a message to the chat history."""
        self.state.append(role, text)

    def get_messages(self):
        """Extract and return the entire chat history."""
        return self.state.messages       

    def encode_image(self, image_path):
        """Encode an image to base64."""
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    def process_prompt(self, question, image_path=None):
        """
        Process a prompt with an optional image.
        Args:
            question (str): The question or prompt to process.
            image_path (str, optional): Path to the image file. Defaults to None.
        """
        # Prepare the payload
        payload = {
            "model": "google/gemini-2.0-flash-exp:free",
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": question + " Here is some context to previous conversation: " + str(self.get_messages())}
                    ]
                }
            ],
            "provider": {"order": ["DeepInfra", "SambaNova", "Fireworks"]},
            "stream": True
        }

        # Add image to payload if provided
        if image_path:
            base64_image = self.encode_image(image_path)
            payload["messages"][0]["content"].append(
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
            )

        # Send request to the API
        url = "https://openrouter.ai/api/v1/chat/completions"
        headers = {
            "Authorization": f"Bearer {self.api_token}",
            "Content-Type": "application/json"
        }

        buffer = ""
        full_message = []
        with requests.post(url, headers=headers, json=payload, stream=True) as r:
            for chunk in r.iter_content(chunk_size=1024, decode_unicode=True):
                buffer += chunk
                while True:
                    line_end = buffer.find('\n')
                    if line_end == -1:
                        break

                    line = buffer[:line_end].strip()
                    buffer = buffer[line_end + 1:]

                    if line.startswith('data: '):
                        data = line[6:]
                        if data == '[DONE]':
                            break

                        try:
                            data_obj = json.loads(data)
                            content = data_obj["choices"][0]["delta"].get("content")
                            if content:
                                print(content, end="", flush=True)
                                full_message.append(content)
                        except json.JSONDecodeError:
                            pass

        # Update state with the response
        joined_text = " ".join(full_message)
        self.append_message("human", question)
        self.append_message("ai", joined_text)

In [3]:
class State:
    def __init__(self):
        self.messages = []

    def append(self, role, text):
        self.messages.append({"role": role, "content": text})



In [4]:

agent = ChatAgent(API_TOKEN)

# Add initial messages
agent.append_message("human", "You are an AI agent that will be used to extract the words in a pdf file split into multiple pages")
agent.append_message("ai", "Ok Got it")


In [None]:
# Process a prompt with an image
question = "Try to Transcribe the text and extract every single word"
image_path = "extracted-images/page_5.jpg"  # Replace with your actual image path
agent.process_prompt(question, image_path)

In [None]:
# Loop through pages 1 to 38
question = "Try to Transcribe the text and extract every single word."
for page_number in range(1, 34):  # Pages 1 to 38
    image_path = f"extracted-images/page_{page_number}.jpg"  # Replace with your actual image path format
    print(f"\nProcessing page {page_number}...")
    agent.process_prompt(question, image_path)

In [None]:
agent.get_messages()