## imports

In [26]:
from langchain_ollama import ChatOllama
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

import bs4
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

## basics

In [6]:
# Initialize the model
# temperature=0 makes the model more deterministic (good for coding tasks)
llm = ChatOllama(
    model="llama3.2",
    temperature=0,
)

# Invoke the model directly
response = llm.invoke("Write a Python function to check if a number is prime.")

# response type:  AIMessage
print(response.content)
print(type(response))

def is_prime(n):
    """
    Checks if a number is prime.

    Args:
        n (int): The number to check.

    Returns:
        bool: True if the number is prime, False otherwise.
    """
    if n <= 1:
        return False
    for i in range(2, int(n ** 0.5) + 1):
        if n % i == 0:
            return False
    return True
<class 'langchain_core.messages.ai.AIMessage'>


In [7]:
# 1. Initialize Model
# llm = ChatOllama(model="llama3.2")

# 2. Create a Prompt Template
# This mimics the "System Prompt" logic mentioned in the JetBrains Mellum discussion
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert software engineer. You only answer with code, no explanation."),
    ("user", "{topic}")
])

# 3. Create the Chain
# The '|' operator pipes the output of one component into the next
chain = prompt | llm | StrOutputParser()

# 4. Invoke the Chain
result = chain.invoke({"topic": "Create a Python decorator that times execution speed"})

print(result)

```python
import time
from functools import wraps

def timer_decorator(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Function '{func.__name__}' executed in {end_time - start_time:.4f} seconds.")
        return result
    return wrapper

# Example usage:
@timer_decorator
def example_function():
    for i in range(10000000):
        pass

example_function()
```


In [8]:
import time
from functools import wraps

def timer_decorator(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Function '{func.__name__}' executed in {end_time - start_time:.4f} seconds.")
        return result
    return wrapper

# Example usage:
@timer_decorator
def example_function():
    for i in range(10000000):
        pass

example_function()

Function 'example_function' executed in 0.0799 seconds.


## RAG

In [29]:
# 1. Setup the LLMe and Embeddings
llme = ChatOllama(model="llama3.2")
embeddings = OllamaEmbeddings(model="llama3.2")

# 2. Load Data (Simulating reading documentation)
loader = PyPDFLoader("~/Downloads/pytorch.pdf")
docs = loader.load()

# 3. Split Data into chunks (Inference models have context limits)
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20, separator="\n")
splits = text_splitter.split_documents(docs)

# 4. Store Embeddings in Vector Store (ChromaDB)
# This creates a searchable database of your document locally
vectorstore = Chroma.from_documents(
  documents=splits, 
  embedding=embeddings
)
retriever = vectorstore.as_retriever()

KeyboardInterrupt: 

In [None]:
# 5. Create the "Answer" Prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# 6. Build the Chain
# This chain combines the retrieved docs with the LLMe
question_answer_chain = create_stuff_documents_chain(llme, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# 7. Run the RAG Application
response = rag_chain.invoke({"input": "What are embedding models used for?"})

print("Context used:", len(response["context"])) # Shows how many docs it found
print("Answer:", response["answer"])

ConnectionError: HTTPSConnectionPool(host='ollama.com', port=443): Max retries exceeded with url: /blog/embedding-models (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f0dab58b550>: Failed to resolve 'ollama.com' ([Errno -2] Name or service not known)"))