# Settings

In [8]:
import os
from dotenv import load_dotenv

load_dotenv()
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")

# Image conversion to markdown

In [None]:
import base64
from PIL import Image
from io import BytesIO

def load_image_as_base64(image_path):
    """
    Converts an image file to a Base64 encoded string.
    """
    
    with Image.open(image_path) as img:
        # Convert the image to bytes
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        img_bytes = buffered.getvalue()
        
        # Encode the bytes to Base64
        img_base64 = base64.b64encode(img_bytes).decode("utf-8")
        return img_base64

In [None]:
from mistralai import Mistral
import re

def img_to_markdown(image_filepath, prompt, model):
    """
    Converts an image to a markdown file using Mistral API.

    Inputs:
        image_filepath (str): Path to the image file.
        prompt (str): Prompt to be used for the Mistral API.
        model (str): Model name to be used for the Mistral API.
    
    Outputs:
        The generated markdown content.
    """

    markdown_output = []

    img_base64 = load_image_as_base64(image_filepath)

    last_page_markdown = markdown_output[-1] if markdown_output else ""

    final_prompt = f"{prompt}\n\nLast processed page:\n{last_page_markdown}\n\nThis page:\n" if markdown_output else prompt

    messages = [
            {
            "role": "user",
            "content": [
                {
                "type": "text",
                "text": final_prompt,
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/png;base64,{img_base64}"
                }
                }
            ]
            }
        ]

    client = Mistral(api_key=MISTRAL_API_KEY)
    response = client.chat.complete(model=model, messages=messages, temperature=0.0)
    markdown_output.append(response.choices[0].message.content)

    markdown_saved_filepath = image_filepath.replace('jpg', 'md')

    with open(markdown_saved_filepath, 'w', encoding='utf-8') as f:
        # Parse the markdown content to remove ```markdown and ```
        cleaned_output = []
        for content in markdown_output:
            match = re.search(r"```markdown(.*?)```", content, re.DOTALL)
            if match:
                cleaned_output.append(match.group(1).strip())
            else:
                cleaned_output.append(content.strip())
        f.write('\n\n'.join(cleaned_output))

    print(f"Markdown saved to {markdown_saved_filepath}")



In [11]:
prompt = """
You are an advanced language model. Your task is to read an image. 
The goal is to transform the content of this image into a description into Markdown format to later be able to build a graph from it. 
"""

In [12]:
model = 'mistral-small-latest'

In [13]:
image_filepath = 'data/jpg/tdf.jpg'

In [14]:
img_to_markdown(image_filepath, prompt, model)

Markdown saved to data/md/tdf.md
