In [1]:
# Jina AI Reader

import requests
import json 

def call_jina_reader(url, headers=None, respond_with="text"):
    if not url.startswith("https://r.jina.ai/"):
        url = f"https://r.jina.ai/{url}"
    
    # Set default headers if none are provided
    default_headers = {
        # "x-with-generated-alt": "true", # Enable image captioning
        "Accept": "application/json",
        "x-respond-with": respond_with,
    }
    
    if headers:
        default_headers.update(headers) # Add user-provided headers
    
    response = requests.get(url, headers=default_headers)
    
    # Check for successful response
    response.raise_for_status() 

    data = response.text
    
    return json.loads(data)

In [2]:
# Ollama Local

from openai import OpenAI

# Query LLM
client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [2]:
# fetch html content 
url = "https://www.xda-developers.com/hacked-nintendo-3ds-better-than-gaming-handheld/"
response = call_jina_reader(url, respond_with="html")
html_content = response['data']['html']

In [None]:
print(html_content)

In [13]:
# Query LLM w/ Ollama
# Notes: doesn't work very well with verbose html data 

chat_completion = client.chat.completions.create(
    messages=[
        {'role': 'user', 'content': html_content, }
    ],
    model='reader-lm:1.5b-fp16',
    temperature=0.1,
)
result_markdown = chat_completion.choices[0].message.content

In [None]:
# Query LLM w/ Hugging Face

# pip install transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "jinaai/reader-lm-1.5b"

device = "cuda" # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

# example html content
# html_content = "<html><body><h1>Hello, world!</h1></body></html>"

messages = [{"role": "user", "content": html_content}]
input_text=tokenizer.apply_chat_template(messages, tokenize=False)

print(input_text)

inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=1024, temperature=0, do_sample=False, repetition_penalty=1.08)

print(tokenizer.decode(outputs[0]))


In [None]:
print(result_markdown)