# Setup

Check that everything is ok!

In [1]:
from pathlib import Path
import os
from dotenv import load_dotenv
from openai import OpenAI
from mistralai import Mistral

In [10]:
import sys
import torch
import pydantic

print(f"{sys.version=}")
print(f"{pydantic.__version__=}")
print(f"{torch.__version__=}")

sys.version='3.10.18 (main, Aug  6 2025, 22:52:47) [MSC v.1944 64 bit (AMD64)]'
pydantic.__version__='2.11.7'
torch.__version__='2.8.0+cpu'


# Environment Variables

In [3]:
load_dotenv()

OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
MISTRAL_API_KEY = os.environ["MISTRAL_API_KEY"]

# Docling

In [4]:
path_artifacts = Path.home() / ".cache" / "docling" / "models"  # Docling models are stored here

path_artifacts.exists()

True

In [5]:
from docling.document_converter import DocumentConverter
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption

source = "https://hutchesonlab.fiu.edu/wp-content/uploads/sample-pdf.pdf" 

pipeline_options = PdfPipelineOptions(
    artifacts_path=path_artifacts.as_posix(),
    )

converter = DocumentConverter(
    format_options={
        InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
    },
)

doc = converter.convert(source).document

doc.export_to_markdown()


2025-09-04 15:19:17,738 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-09-04 15:19:17,791 - INFO - Going to convert document batch...
2025-09-04 15:19:17,802 - INFO - Initializing pipeline for StandardPdfPipeline with options hash 79a916694caff5f1b1502ac8de0fb857
2025-09-04 15:19:17,837 - INFO - Loading plugin 'docling_defaults'
2025-09-04 15:19:17,925 - INFO - Registered ocr engines: ['easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
2025-09-04 15:20:05,337 - INFO - Accelerator device: 'cpu'
  from .autonotebook import tqdm as notebook_tqdm
2025-09-04 15:20:15,336 - INFO - Accelerator device: 'cpu'
2025-09-04 15:20:22,042 - INFO - Accelerator device: 'cpu'
2025-09-04 15:20:23,620 - INFO - Loading plugin 'docling_defaults'
2025-09-04 15:20:23,660 - INFO - Registered picture descriptions: ['vlm', 'api']
2025-09-04 15:20:23,664 - INFO - Processing document sample-pdf.pdf
2025-09-04 15:20:26,949 - INFO - Finished converting document sample-pdf.pdf in 70.12 sec.


'## This is a Sample PDF file'

# OpenAI

In [6]:
client_openai = OpenAI(
    api_key=OPENAI_API_KEY
)

response = client_openai.embeddings.create(
    input="Your text string goes here",
    model="text-embedding-3-small"
)

print(len(response.data[0].embedding))

2025-09-04 15:20:37,045 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


1536


# MistralAI

In [7]:
client_mistral = Mistral(api_key=MISTRAL_API_KEY)

chat_response = client_mistral.chat.complete(
    model = "mistral-medium-2505",
    messages = [
        {
            "role": "user",
            "content": "Tell me a joke about devops",
        },
    ]
)

print(chat_response.choices[0].message.content)

2025-09-04 15:20:38,998 - INFO - HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"


Sure! Here's a DevOps-themed joke for you:

**Why did the DevOps engineer break up with their significant other?**

Because they kept saying, *"It works on my machine!"*

*(Bonus groan-worthy follow-up: And then they tried to fix the relationship with a `git commit --amend`.)*

Hope that gives you a chuckle! ðŸ˜„
