# Ollama + LangChain quickstart

This notebook demonstrates connecting to a local Ollama server and answering a question using LangChain.

Prerequisites:
- Ollama installed and running (`ollama serve`), with a chat model pulled (e.g., `ollama pull llama3.1:8b`)
- Python 3.10+



In [1]:
import os
from pathlib import Path
from dotenv import load_dotenv

# Load variables from .env in project root if present
loaded = load_dotenv()
load_dotenv("../prompts.env")

# Validate key presence
if not os.getenv("OPENAI_API_KEY"):
    raise ValueError(
        "OPENAI_API_KEY not set. Add it to .env (or prompts.env) or export it in your shell."
    )

print("OPENAI_API_KEY loaded from environment.")

OPENAI_API_KEY loaded from environment.


In [2]:
import os
from langchain_openai import ChatOpenAI

model_name = os.getenv("OPENAI_MODEL", "gpt-5-nano")

model = ChatOpenAI(model=model_name)

In [3]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [4]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [2]:
from pathlib import Path
import requests
from urllib.parse import urlparse
from langchain_community.document_loaders import PyPDFLoader

# Local cache directory and urls list file
documents_dir = (Path.cwd().parent / "documents").resolve()
documents_dir.mkdir(parents=True, exist_ok=True)
urls_file = documents_dir / "urls.txt"

# Read URLs from file (one per line, '#' for comments); fallback to default if empty
urls = []
for line in urls_file.read_text().splitlines():
    line = line.strip()
    if not line or line.startswith("#"):
        continue
    urls.append(line)

# Download/cache each URL into documents/
local_paths = []
for url in urls:
    name = Path(urlparse(url).path).name or "document.pdf"
    local_path = documents_dir / name
    if not local_path.exists():
        resp = requests.get(url, timeout=60)
        resp.raise_for_status()
        local_path.write_bytes(resp.content)
    local_paths.append(local_path)

# Backward-compatible: keep a single loader for the first document
# loader = PyPDFLoader(str(local_paths[0]))

# Optional: loaders for all cached documents
loaders = [PyPDFLoader(str(p)) for p in local_paths]

In [9]:
loaders

[<langchain_community.document_loaders.pdf.PyPDFLoader at 0x10c286f50>]

In [None]:
# Load all documents from the loaders list
all_docs = []
for ldr in loaders:
    try:
        loaded = ldr.load()
        all_docs.extend(loaded)
    except Exception as e:
        print(f"Failed to load {ldr.file_path}: {e}")

# Use combined docs downstream
docs = all_docs
print(f"Loaded {len(docs)} pages/chunks across {len(loaders)} document(s).")
docs[0] if docs else None

Document(metadata={'producer': 'Adobe PDF Library 20.12.75', 'creator': 'Acrobat PDFMaker 20 for Word', 'creationdate': '2023-02-08T09:59:37-08:00', 'author': 'Gibler Andrew  N.', 'company': '', 'contenttypeid': '0x0101009B701E64863F8F48AC2F600775CFA4F9', 'created': 'D:20210527', 'lastsaved': 'D:20211123', 'moddate': '2023-02-08T09:59:44-08:00', 'sourcemodified': 'D:20230208175858', 'title': '', 'source': '../Narrative-Medication-Treatment-Algorithm-for-Adults-with-GAD.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content='• Generalized Anxiety Disorder (GAD) frequently has a waxing and waning course, so medication treatment should continue for 6-\n12 months after remission to reduce risk of relapse.1 \n• It is useful to monitor for clinically meaning improvement of symptoms and function using the Hamilton Anxiety Scale (HAM-A), \nthe Generalized Anxiety Disorder-7 (GAD-7), or another validated grading scale routinely used in the provider’s practice. \n• At any point befo

In [None]:
total_chars = sum(len(d.page_content) for d in docs)
print(f"Total characters across all docs: {total_chars}")

Total characters: 3124


In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split pdf post into {len(all_splits)} sub-documents.")

Split pdf post into 15 sub-documents.


In [9]:
document_ids = vector_store.add_documents(documents=all_splits)

print(document_ids[:3])

['58d4675c-d82c-4679-babc-89f9c5f41ba5', '26370aa7-f5f0-4b48-8391-9e0033f0c1d0', 'c6375a46-c53b-426b-893d-46b2658fd623']


In [10]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [11]:
from langchain.agents import create_agent

def rag_agent(message, history):
    tools = [retrieve_context]
    prompt = os.getenv("SYSTEM_PROMPT")
    agent = create_agent(model, tools, system_prompt=prompt)

    final_text = None
    for event in agent.stream(
        {"messages": [{"role": "user", "content": message}]},
        stream_mode="values",
    ):
        # capture only the latest assistant content to return to gradio
        final_text = event["messages"][-1].content

    return final_text or "No response generated."

In [12]:
# query = (
#     "What adjunctive therapy methods are recommended for GAD?\n\n"
# )

# for event in agent.stream(
#     {"messages": [{"role": "user", "content": query}]},
#     stream_mode="values",
# ):
#     event["messages"][-1].pretty_print()

In [13]:
# event["messages"][-1].pretty_print()

In [14]:
import gradio as gr

app = gr.ChatInterface(
    fn=rag_agent,
    title="RAG Chatbot",
    description="Ask me anything about the loaded documents!",
)

app.launch()

  from .autonotebook import tqdm as notebook_tqdm
  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


