# Setup

In [1]:
from openai import OpenAI
from openai import embeddings
from dotenv import load_dotenv
import os
import json

load_dotenv()


print(os.getenv("OLLAMA_BASE_URL")[:20], "...")


http://localhost:114 ...


In [2]:
client = OpenAI(
    base_url = os.getenv("OLLAMA_BASE_URL"),
    api_key='ollama', # required, but unused
)


In [3]:

response = client.chat.completions.create(
  model="gpt-oss",
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Who won the world series in 2020?"},
    {"role": "assistant", "content": "The LA Dodgers won in 2020."},
    {"role": "user", "content": "Where was it played?"}
  ]
)
print(response.choices[0].message.content)

The 2020 World Series was staged at **Globe Life Field in Arlington, Texas** (the home ballpark of the Texas Rangers). Because of COVID‑19 restrictions the series was moved from the Dodgers’ Dodger Stadium and the Mets’ Citi Field, and all games were played there.


In [4]:
print(response.to_json())


{
  "id": "chatcmpl-322",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "The 2020 World Series was staged at **Globe Life Field in Arlington, Texas** (the home ballpark of the Texas Rangers). Because of COVID‑19 restrictions the series was moved from the Dodgers’ Dodger Stadium and the Mets’ Citi Field, and all games were played there.",
        "role": "assistant",
        "reasoning": "User asks: \"Where was it played?\" referring to location of 2020 World Series. Answer: at Globe Life Field in Arlington, Texas, because due to COVID. Also can mention: The series was played at Globe Life Field, home of Texas Rangers, in Arlington, Texas, because of pandemic restrictions, MLB moved games from Dodgers' Dodger Stadium and Mets' Citi Field. So answer accordingly."
      }
    }
  ],
  "created": 1758848713,
  "model": "gpt-oss",
  "object": "chat.completion",
  "system_fingerprint": "fp_ollama",
  "usage": {
    "completion_tok

# Embeddings



In [5]:
embedding_model = "nomic-embed-text"
embedding = client.embeddings.create(
    model=embedding_model,
    input=["Hello, world!", "Hola!"]
)

In [6]:
[type(e) for e in embedding]

[tuple, tuple, tuple, tuple]

In [7]:
for e in embedding:
    print(e)

('data', [Embedding(embedding=[0.015951632, -0.00066996424, -0.15885885, -0.012657387, -0.017753249, 0.061320245, -0.005491252, -0.010701116, -0.0055996445, -0.040793166, 0.013789049, 0.07319223, 0.019487698, 0.051192224, 0.027002448, -0.059568644, 0.0076662255, -0.06245029, -0.029461585, 0.025311636, -0.03167875, -0.08911716, 0.008606663, 0.01984472, 0.12291859, 0.009061198, -0.037374612, 0.07187734, 0.012613118, -0.0032542283, -0.005120477, 0.0077211387, -0.0016446928, 0.034060054, 0.056631163, 0.00035218656, 0.02197616, 0.0075445683, 0.024604717, -0.026079183, 0.012698903, -0.00062393147, 0.012742986, 0.005797232, 0.07582022, -0.017264228, -0.019319652, -0.034184746, 0.069988206, -0.03556641, -0.04616333, -0.0073607224, -0.0038881674, 0.05516038, 0.04708488, 0.01671669, 0.053503018, -0.035663106, 0.018357659, 0.05551932, 0.041776985, 0.054344818, 0.028155666, 0.03244801, 0.0007530078, -0.047532044, -0.01414573, 0.07351867, 0.023626907, 0.009264789, 0.0896961, 0.0011518894, 0.0261826

In [8]:
embedding.data[0].embedding[:20]

[0.015951632,
 -0.00066996424,
 -0.15885885,
 -0.012657387,
 -0.017753249,
 0.061320245,
 -0.005491252,
 -0.010701116,
 -0.0055996445,
 -0.040793166,
 0.013789049,
 0.07319223,
 0.019487698,
 0.051192224,
 0.027002448,
 -0.059568644,
 0.0076662255,
 -0.06245029,
 -0.029461585,
 0.025311636]

In [9]:
embedding.data[1].embedding[:20]

[0.035152327,
 0.03157345,
 -0.15702538,
 0.033572234,
 -0.008452722,
 -0.011450175,
 -0.038476005,
 0.0105022015,
 -0.0053734747,
 -0.017551042,
 0.02773594,
 0.03740205,
 0.05844796,
 -0.008119061,
 0.04867942,
 -0.05541964,
 0.07479316,
 -0.022522476,
 -0.08598193,
 0.07619329]

## Cosine similarity

In [10]:
import numpy as np
# input=["feline friends say", "meow"]
input=['Hello, world!', 'Hola!']

resp = client.embeddings.create(
    model=embedding_model,
    input=input,

)

embedding_a = resp.data[0].embedding
embedding_b = resp.data[1].embedding

similarity_score = np.dot(embedding_a, embedding_b)
print(similarity_score)


0.5300049915725503


In [11]:
input=[
    'Azure OpenAI is a cloud-based AI platform.',
    'The Eiffel Tower.'
    ]

resp = client.embeddings.create(
    model=embedding_model,
    input=input,

)

embedding_a = resp.data[0].embedding
embedding_b = resp.data[1].embedding

similarity_score = np.dot(embedding_a, embedding_b)
print(similarity_score)

0.3950775087825277


In [12]:
input=[
    'The Sydney Harbour Bridge.',
    'The Eiffel Tower.'
    ]

resp = client.embeddings.create(
    model=embedding_model,
    input=input,

)

embedding_a = resp.data[0].embedding
embedding_b = resp.data[1].embedding

similarity_score = np.dot(embedding_a, embedding_b)
print(similarity_score)

0.6044053292128053


# Responses API for gpt-oss


- use responsed api for gpt-oss
- use structured output


Ollama doesn’t (yet) support the Responses API natively.
https://cookbook.openai.com/articles/gpt-oss/run-locally-ollama

In [13]:
from pydantic import BaseModel, Field
from typing import List
class ItemPrice(BaseModel):
    item: str
    price: float

class ItemPriceResponse(BaseModel):
    items: List[ItemPrice]

user_input = "Extract product names and prices: apples, $4, bananas, $3, cherries, $5"


In [14]:


structured_output_response = client.chat.completions.create(
    model="gpt-oss",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": user_input},
        # {"role": "user", "content":
        # "List 3 items with prices in a JSON array, with fields item and price."}
    ],
    # response_format=ItemPriceResponse
)


In [15]:
print(structured_output_response.choices[0].message.content)

**Products and Prices**

- **apples** – $4  
- **bananas** – $3  
- **cherries** – $5


In [16]:
structured_output_response = client.chat.completions.parse(
    model="llama3.2",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": user_input},
        # {"role": "user", "content": "List 3 items with prices in a JSON array, with fields item and price."}
    ],
    response_format=ItemPriceResponse
)
print(structured_output_response.choices[0])

ParsedChoice[ItemPriceResponse](finish_reason='stop', index=0, logprobs=None, message=ParsedChatCompletionMessage[ItemPriceResponse](content='{"items": [\n    {"item": "apples", "price": 4},\n    {"item": "bananas", "price": 3},\n    {"item": "cherries", "price": 5}\n]}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, parsed=ItemPriceResponse(items=[ItemPrice(item='apples', price=4.0), ItemPrice(item='bananas', price=3.0), ItemPrice(item='cherries', price=5.0)])))


In [17]:
print(structured_output_response.choices[0].message)

ParsedChatCompletionMessage[ItemPriceResponse](content='{"items": [\n    {"item": "apples", "price": 4},\n    {"item": "bananas", "price": 3},\n    {"item": "cherries", "price": 5}\n]}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, parsed=ItemPriceResponse(items=[ItemPrice(item='apples', price=4.0), ItemPrice(item='bananas', price=3.0), ItemPrice(item='cherries', price=5.0)]))


In [18]:
choice_0 = structured_output_response.choices[0].message.parsed
print(type(choice_0))
for item in choice_0.items:
    print("item name: ", item.item, ", price: ", item.price)

<class '__main__.ItemPriceResponse'>
item name:  apples , price:  4.0
item name:  bananas , price:  3.0
item name:  cherries , price:  5.0


In [19]:
type(choice_0)

__main__.ItemPriceResponse

In [20]:
print(choice_0)

items=[ItemPrice(item='apples', price=4.0), ItemPrice(item='bananas', price=3.0), ItemPrice(item='cherries', price=5.0)]


In [22]:
choice_0.items

[ItemPrice(item='apples', price=4.0),
 ItemPrice(item='bananas', price=3.0),
 ItemPrice(item='cherries', price=5.0)]

In [23]:
structured_output_response

ParsedChatCompletion[ItemPriceResponse](id='chatcmpl-812', choices=[ParsedChoice[ItemPriceResponse](finish_reason='stop', index=0, logprobs=None, message=ParsedChatCompletionMessage[ItemPriceResponse](content='{"items": [\n    {"item": "apples", "price": 4},\n    {"item": "bananas", "price": 3},\n    {"item": "cherries", "price": 5}\n]}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, parsed=ItemPriceResponse(items=[ItemPrice(item='apples', price=4.0), ItemPrice(item='bananas', price=3.0), ItemPrice(item='cherries', price=5.0)])))], created=1758848865, model='llama3.2', object='chat.completion', service_tier=None, system_fingerprint='fp_ollama', usage=CompletionUsage(completion_tokens=48, prompt_tokens=52, total_tokens=100, completion_tokens_details=None, prompt_tokens_details=None))

In [24]:
print(structured_output_response)

ParsedChatCompletion[ItemPriceResponse](id='chatcmpl-812', choices=[ParsedChoice[ItemPriceResponse](finish_reason='stop', index=0, logprobs=None, message=ParsedChatCompletionMessage[ItemPriceResponse](content='{"items": [\n    {"item": "apples", "price": 4},\n    {"item": "bananas", "price": 3},\n    {"item": "cherries", "price": 5}\n]}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, parsed=ItemPriceResponse(items=[ItemPrice(item='apples', price=4.0), ItemPrice(item='bananas', price=3.0), ItemPrice(item='cherries', price=5.0)])))], created=1758848865, model='llama3.2', object='chat.completion', service_tier=None, system_fingerprint='fp_ollama', usage=CompletionUsage(completion_tokens=48, prompt_tokens=52, total_tokens=100, completion_tokens_details=None, prompt_tokens_details=None))
