# 01 - Ollama Basics

Learn how to use Ollama for LLM inference in the GenAI Vanilla Stack.

In [None]:
import os
from dotenv import load_dotenv
from ollama import Client

load_dotenv()
client = Client(host=os.getenv("OLLAMA_BASE_URL"))

## 1. List Available Models

In [None]:
models = client.list()
print("Available Models:")
for model in models["models"]:
    print(f"  • {model['name']} ({model['size'] / 1e9:.1f}GB)")

## 2. Simple Chat

In [None]:
response = client.chat(model="llama3.2", messages=[
    {"role": "user", "content": "Explain RAG in one sentence."}
])
print(response["message"]["content"])

## 3. Streaming Responses

In [None]:
stream = client.chat(model="llama3.2", messages=[
    {"role": "user", "content": "Write a haiku about AI."}
], stream=True)

for chunk in stream:
    print(chunk["message"]["content"], end="", flush=True)

## 4. Generate Embeddings

In [None]:
embedding = client.embeddings(model="llama3.2", prompt="GenAI Vanilla Stack")
print(f"Embedding dimension: {len(embedding['embedding'])}")
print(f"First 5 values: {embedding['embedding'][:5]}")