In [1]:
from langchain_pymupdf4llm import PyMuPDF4LLMLoader
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
import os
from pydantic import BaseModel, Field
from typing import Literal
from rich.pretty import pprint

load_dotenv()

API_KEY = os.getenv("API_KEY")
MODEL = os.getenv("MODEL", "gemini-2.5-flash")
BASE_URL = os.getenv("BASE_URL")


llm = ChatOpenAI(
    model=MODEL,
    api_key=API_KEY, # type: ignore
    base_url=BASE_URL,
    temperature=0.1
)

print(llm.invoke("Hi").content)

Hi there! How can I help you today?


In [2]:
file_path = "./test_dir/Deep Learning Assignment 1.pdf"

loader = PyMuPDF4LLMLoader(file_path)
data = loader.load()

In [3]:
class DocumentInfo(BaseModel):
    doc_type: Literal["experiment", "assignment"]
    subject: str
    number: int = Field(..., gt=0)

In [4]:
response = await llm.with_structured_output(DocumentInfo).ainvoke(f"""
Extract the following information from the document:
1. Document Type: Whether the document is an "experiment" or an "assignment".
2. Subject Name
3. Document Number: The number associated with the document (e.g., Experiment 5, Assignment 3).                                                          

# Document:
{data[0].page_content}
""")

pprint(response)

In [5]:
class Question(BaseModel):
    question: str
    marks: int = Field(..., gt=0)
    requires_diagram: bool
    question_number: str

class Questions(BaseModel):
    questions: list[Question]

In [6]:
content = "\n".join([page.page_content for page in data])

In [7]:
response = llm.with_structured_output(Questions).invoke(
    f"""
Extract all the questions from the document along with their marks, whether they require a diagram, and their question number.

Only set requires_diagram to true if the question explicitly asks for a diagram.

# Document:
{content}
"""
)

questions = Questions.model_validate(response)

In [8]:
for question in questions.questions:
    pprint(question)

In [9]:
def num_points(marks: int) -> int:
    return marks * 2

In [10]:
class Answer(BaseModel):
    answer: str
    diagram_description: str | None

In [11]:
prompt = """
For the following question answer in point format.
If the question requires a diagram, provide a brief description of the diagram that would best illustrate the answer.
The number of points must be {points}

# Question:
{question}

# Requires Diagram: {requires_diagram}
"""

response = llm.with_structured_output(Answer).invoke(
    prompt.format(
        question=questions.questions[-2].question,
        requires_diagram=questions.questions[-2].requires_diagram,
        points=num_points(questions.questions[-2].marks)
    )
)

In [12]:
print(response.answer)

An autoencoder is a type of artificial neural network used for unsupervised learning of efficient data codings. It works by compressing the input data into a lower-dimensional latent representation and then reconstructing the original data from this representation. The architecture typically consists of three main parts: an encoder, a bottleneck (or latent space), and a decoder.

1.  **Encoder:** This part of the autoencoder takes the input data and progressively reduces its dimensionality through a series of layers (e.g., dense, convolutional, recurrent). Each layer transforms the input into a more abstract and compressed representation.
2.  **Bottleneck (Latent Space):** This is the central layer of the autoencoder, representing the compressed, lower-dimensional encoding of the input data. It captures the most important features of the data.
3.  **Decoder:** This part takes the compressed representation from the bottleneck and progressively reconstructs the original input data. It us

In [13]:
print(response.diagram_description)

A diagram illustrating the architecture of an autoencoder. It shows an input layer on the left, followed by several progressively smaller hidden layers representing the encoder. These lead to a single, narrow bottleneck layer (the latent space). From the bottleneck, the layers expand in size, mirroring the encoder, representing the decoder. Finally, an output layer on the right reconstructs the input. Arrows indicate the flow of data from input through encoder to bottleneck, and then through decoder to output.
