# pydantic parser

In [None]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel

class NameModel(BaseModel):
    name: str


input = '{"name" : "Harry $\\\\up$"}'
print(input)

parser = PydanticOutputParser(pydantic_object=NameModel)
output = parser.parse(input)
print(repr(output.name))  # Output: NameModel(name='Harry \n$\\\\up$')
print(output.name)  # Output: NameModel(name='Harry \n$\\\\up$')




# regex to identify backslashes

In [None]:
import re

def escape_latex_backslashes(text: str) -> str:
    # Replace single backslashes not followed by 'n' with double backslashes
    return re.sub(r'(?<!\\)\\(?!(n|\\))', r'\\\\', text)

input1 = r"\\cup"
output = escape_latex_backslashes(input1)

print(output)

In [None]:
import pypandoc

output = pypandoc.convert_text('# Hello', 'html', format='md')
print(output)

In [None]:
import requests

data = "$$x^2 + y^2$$ = z^2"
resp = requests.post("http://localhost:8080/process", data=data)

if resp.ok:
    print("✅ Processed:", resp.text)
else:
    print("❌ Error:", resp.text)

In [None]:
import re

def extract_images(text: str) -> list[str]:
    """
    Extracts image URLs from the markdown text.
    Returns a list of image URLs.
    """
    pattern = r'!\[.*?\]\((.*?)\)'
    matches = re.findall(pattern, text)
    # only keep the URLs, removing ![alt text]
    return [match.strip() for match in matches]

result = extract_images("![alt text](http://example.com/image.png) and ![another](http://example.com/another.png)")
print(result)  # Output: ['http://example.com/image.png', 'http://example

result = extract_images("No images here, text.png")
print(result)  # Output: []

result = extract_images("1 images here, ![yap](text.png)")
print(result)  # Output: ['text.png']

In [None]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

load_dotenv()

# Uses gpt-5-mini:
#    - more intelligent
llm_mini = ChatOpenAI(
            model="gpt-5-mini",
            api_key=os.environ["OPENAI_API_KEY"],
            reasoning_effort="high"
        )
prompt = "how many letters are in this prompt, only return the number."
prompt = "return and only return the prompt exactly"
prompt = "waeuifgiufaiu liaisofeoidob ofbea df kdb vboae beoihffewafne nod In this prompt, where does the first p occur, using 0 indexing? only return the answer"

response = llm_mini.invoke(prompt).content

print(list(prompt).index("p"))
print(response)
# print(len(response) == int(response))

In [None]:
import os
from pathlib import Path

print("Current working directory:", os.getcwd())
print("Notebook file location:", Path(__file__).parent if '__file__' in globals() else "Not available in notebook")
print("VS Code workspace root:", os.environ.get('VSCODE_CWD', 'Not set'))

# Check for .env files in different locations
locations = [
    ".",
    "..",
    "../..",
    "/home/jimbo/lambda/in2lambda"
]

for loc in locations:
    env_path = Path(loc) / ".env"
    if env_path.exists():
        print(f"Found .env at: {env_path.resolve()}")

In [None]:
import pypandoc
import json

input = r"""
(b)

$$
\begin{aligned}
f_{X}(x) & =\int_{y=-\infty}^{\infty} f_{X Y}(x, y) d y=\frac{1}{8} \int_{y=0}^{2}(x+y) d y=\frac{1}{8}\left[x y+\frac{y^{2}}{2}\right]_{y=0}^{2} \\
& =\frac{1}{4}(x+1)
\end{aligned}
$$
2.
for $0<x<2$, and 0 otherwise. Identically for $f_{Y}(y)$.
(c) Since $f(x, y) \neq f(x) f(y), X$ and $Y$ are not independent.
"""

ast = pypandoc.convert_text(input, to='json', format='md')

ast = json.loads(ast)
print(input)
print("===================================================================================================================================================")
print(json.dumps(ast, indent=2))

print(len(ast['blocks']))
for a in ast['blocks']:
    print(a)


print("===================================================================================================================================================")

ast = json.dumps(ast)
ast = pypandoc.convert_text(ast, to='md', format='json')
print(ast)


In [None]:
import json

# Using line_parser.lua
def parse_with_custom_reader(markdown_file):
    """Use custom Lua reader from Python"""
    import subprocess
    
    result = subprocess.run([
        'pandoc', 
        '--from=line_parser.lua',
        markdown_file,
        '-t', 'json'
    ], capture_output=True, text=True)
    
    if result.returncode == 0:
        return json.loads(result.stdout)
    else:
        print(f"Error: {result.stderr}")
        return None

# Usage
ast = parse_with_custom_reader("testing.md")

In [None]:
# intermediate representation of the markdown
class Markdown():
    def __init__(self, content):
        self.content = content

class DisplayMath(Markdown):
    content = ""
    
    def __init__(self, content):
        super().__init__(content)

    def __str__(self):
        return f"$$\n{self.content}\n$$"
    
    def __repr__(self):
        return f"DisplayMath({self.content!r})"

class RegularText(Markdown):
    def __init__(self, content):
        super().__init__(content)

    def __str__(self):
        return self.content

    def __repr__(self):
        return f"RegularText({self.content!r})"


def markdown_to_classes(markdown: str) -> list[Markdown]:
    lines = markdown.split("\n")
    ret = []
    math_buffer = []
    displayMath = False
    for line in lines:
        if line == "$$":
            displayMath = not displayMath
            if not displayMath:
                ret.append(DisplayMath("\n".join(math_buffer)))
                math_buffer = []
        else:
            if displayMath:
                math_buffer.append(line)
            else:
                ret.append(RegularText(line))
    return ret

def classes_to_markdown(classes: list[Markdown]) -> str:
    lines = []
    for c in classes:
        lines.append(str(c))
    return "\n".join(lines)


input = r"""
(b)

$$
\begin{aligned}
f_{X}(x) & =\int_{y=-\infty}^{\infty} f_{X Y}(x, y) d y=\frac{1}{8} \int_{y=0}^{2}(x+y) d y=\frac{1}{8}\left[x y+\frac{y^{2}}{2}\right]_{y=0}^{2} \\
& =\frac{1}{4}(x+1)
\end{aligned}
$$
2.
for $0<x<2$, and 0 otherwise. Identically for $f_{Y}(y)$.
(c) Since $f(x, y) \neq f(x) f(y), X$ and $Y$ are not independent.
"""

print(classes_to_markdown(markdown_to_classes(input)) == input)