# Ollama native API

In [4]:
options={
    'num_ctx': 4096,
    'temperature': 0
}

In [5]:
# Synchronous

from ollama import chat
from ollama import ChatResponse

response: ChatResponse = chat(model='llama3.1', options=options, messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue? Answer with about 10 words.',
  },
])
print(response['message']['content'])
# or access fields directly from the response object
print(response.message.content)

Scattered sunlight by tiny molecules in Earth's atmosphere makes it blue.
Scattered sunlight by tiny molecules in Earth's atmosphere makes it blue.


In [6]:
# Synchronous with stream=True

from ollama import chat

stream = chat(
    model='llama3.1',
    messages=[{'role': 'user', 'content': 'Why is the sky blue? Answer with about 10 words in markdown with a title'}],
    stream=True,
)

for chunk in stream:
    print(chunk['message']['content'], end='', flush=True)

**The Color of the Sky**
Scattering of sunlight by atmospheric particles makes it blue.

In [7]:
# Synchronous client

from ollama import Client
client = Client(
  host='http://localhost:11434',
  headers={'x-some-header': 'some-value'}
)
response = client.chat(model='llama3.1', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue? Answer with about 10 words.',
  },
])
response

ChatResponse(model='llama3.1', created_at='2025-04-11T12:42:16.632337956Z', done=True, done_reason='stop', total_duration=829041452, load_duration=18770417, prompt_eval_count=23, prompt_eval_duration=98000000, eval_count=14, eval_duration=710000000, message=Message(role='assistant', content='Scattered sunlight interacts with tiny molecules of atmospheric gases and particles.', images=None, tool_calls=None))

In [8]:
# Asynchronous client

import asyncio
from ollama import AsyncClient

async def chat():
  message = {'role': 'user', 'content': 'Why is the sky blue? Answer with about 10 words.'}
  return await AsyncClient().chat(model='llama3.1', messages=[message])

#asyncio.run(chat())
print(await chat())

model='llama3.1' created_at='2025-04-11T12:42:17.422704645Z' done=True done_reason='stop' total_duration=681027206 load_duration=17601899 prompt_eval_count=23 prompt_eval_duration=69000000 eval_count=12 eval_duration=592000000 message=Message(role='assistant', content='The sky appears blue due to scattering of sunlight particles.', images=None, tool_calls=None)


In [9]:
# Asynchronous with Semaphore limit to 1 and dynamic message addition with asyncio.Queue

import asyncio
from ollama import AsyncClient

# Define a semaphore with a limit of 1
semaphore = asyncio.Semaphore(1)

async def chat(queue):
    while True:
        message = await queue.get()
        if message is None:  # Sentinel value to stop the worker
            break
        async with semaphore:
            response = await AsyncClient().chat(model='llama3.1', messages=[message])
            print(response)
        queue.task_done()

async def main():
    queue = asyncio.Queue()
    
    # Start the chat worker
    worker = asyncio.create_task(chat(queue))
    
    # Add initial messages to the queue
    await queue.put({'role': 'user', 'content': 'Why is the sky blue? Answer with about 10 words.'})
    await queue.put({'role': 'user', 'content': 'What is the capital of France?'})
    
    # Dynamically add a third message after some delay
    await asyncio.sleep(1)
    await queue.put({'role': 'user', 'content': 'What is the speed of light?'})
    
    # Wait for all messages to be processed
    await queue.join()
    
    # Stop the worker
    await queue.put(None)
    await worker

# Run the main function
await main()

model='llama3.1' created_at='2025-04-11T12:42:18.1623914Z' done=True done_reason='stop' total_duration=639514893 load_duration=17534501 prompt_eval_count=23 prompt_eval_duration=71000000 eval_count=11 eval_duration=549000000 message=Message(role='assistant', content='The sky appears blue due to scattering of sunlight.', images=None, tool_calls=None)
model='llama3.1' created_at='2025-04-11T12:42:18.744962805Z' done=True done_reason='stop' total_duration=512966919 load_duration=18919492 prompt_eval_count=17 prompt_eval_duration=103000000 eval_count=8 eval_duration=389000000 message=Message(role='assistant', content='The capital of France is Paris.', images=None, tool_calls=None)
model='llama3.1' created_at='2025-04-11T12:42:27.47907397Z' done=True done_reason='stop' total_duration=8659086357 load_duration=19973295 prompt_eval_count=17 prompt_eval_duration=108000000 eval_count=154 eval_duration=8529000000 message=Message(role='assistant', content="The speed of light in a vacuum is approxi

In [10]:
# Asynchronous client with stream=True

from ollama import AsyncClient

async def chat():
  message = {'role': 'user', 'content': 'Why is the sky blue? Answer with about 10 words.'}
  async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
    print(part['message']['content'],end='', flush=True)

#asyncio.run(chat())
await chat()

The sky appears blue due to scattered sunlight particles.

In [11]:
# Asynchronous client with stream=True and a "yield" instead of the "print"

import asyncio
from ollama import AsyncClient

async def chat():
  message = {'role': 'user', 'content': 'Why is the sky blue? Answer with about 10 words.'}
  async for part in await AsyncClient().chat(model='phi4-mini', messages=[message], stream=True):
    yield (part['message']['content'])

async for result in chat():
	print(result, end='',  flush=True)

Scattering of shorter wavelengths by atmospheric particles makes it appear blue to human eyes.

# OpenAI API

In [12]:
# Synchronous

from openai import OpenAI

client = OpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='dummy_key', # required, but unused
)

response = client.chat.completions.create(
  model="llama3.1",
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Who won the world series in 2020?"},
    {"role": "assistant", "content": "The LA Dodgers won in 2020."},
    {"role": "user", "content": "Where was it played?"}
  ]
)
print(response.choices[0].message.content)

Due to the COVID-19 pandemic, the 2020 World Series was played at Globe Life Field in Arlington, Texas, home of the Texas Rangers, and not at the Dodgers' typical home stadium, Dodger Stadium in Los Angeles. The games were held without a live audience due to health and safety protocols at that time.


In [13]:
# Asynchronous client

from openai import AsyncOpenAI

client = AsyncOpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='dummy_key', # required, but unused
)


async def main():
    chunks = await client.chat.completions.create(
        model="llama3.1",
        messages=[{"role": "user", "content": "Why is the sky blue? Answer with about 10 words."}],
    )
    print(chunks)


await main()

ChatCompletion(id='chatcmpl-474', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Scattering of sunlight by tiny molecules makes the sky blue.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1744375376, model='llama3.1', object='chat.completion', service_tier=None, system_fingerprint='fp_ollama', usage=CompletionUsage(completion_tokens=13, prompt_tokens=23, total_tokens=36, completion_tokens_details=None, prompt_tokens_details=None))


In [14]:
# Asynchronous client with stream=True

import asyncio
from openai import AsyncOpenAI

client = AsyncOpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='dummy_key', # required, but unused
)


async def main():
    stream = await client.chat.completions.create(
        model="llama3.1",
        messages=[{"role": "user", "content": "1+1=? Answer with just one number."}],
        stream=True,
    )
    async for chunk in stream:
        print(chunk.choices[0].delta.content or "", end="", flush=True)


await main()

2

In [15]:
# Asynchronous client with stream=True and a "yield" instead of the "print"

import asyncio
from openai import AsyncOpenAI

client = AsyncOpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='dummy_key', # required, but unused
)


async def chat():
    stream = await client.chat.completions.create(
        model="llama3.1",
        messages=[{"role": "user", "content": "Why is the sky blue? Answer with about 10 words."}],
        stream=True,
    )
    async for chunk in stream:
        yield chunk.choices[0].delta.content


async for result in chat():
	print(result or "", end="", flush=True)

Scattered sunlight by tiny molecules in the atmosphere creates blue color.