# Ollama REST API

In [3]:
!pip install requests

Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Using cached urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests)
  Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)
Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)
Using cached charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl (196 kB)
Using cached idna-3.10-py3-none-any.whl (70 kB)
Using cached urllib3-2.3.0-py3-none-any.whl (128 kB)
Installing collected packages: urllib3, idna, charset-normalizer, certifi, requests
Successfully installed certifi-2024.12.14 charset-normali

### Ollama BASE URL and Endpoints


In [56]:
# Define the base URL for the Ollama API
ollama_url = "http://localhost:11434"


# endpoint for tags
tags_endpoint = "/api/tags"

# endpoint for show
show_endpoint = "/api/show"

# endpoint for chat
chat_endpoint = "/api/chat"

# endpoint for generate
generate_endpoint = "/api/generate"

# endpoint for embeddings
embeddings_endpoint = "/api/embed"


## Get Model List

In [51]:
# Import required libraries
import requests  # For making HTTP requests
import json     # For JSON data handling

# Make a GET request to the Ollama API tags endpoint to retrieve model list
response = requests.get(f"{ollama_url}{tags_endpoint}")

# Print the response in a formatted JSON structure with indentation
print(json.dumps(response.json(), indent=2))


{
  "models": [
    {
      "name": "nomic-embed-text:latest",
      "model": "nomic-embed-text:latest",
      "modified_at": "2025-01-27T01:39:27.950679989+05:30",
      "size": 274302450,
      "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "nomic-bert",
        "families": [
          "nomic-bert"
        ],
        "parameter_size": "137M",
        "quantization_level": "F16"
      }
    },
    {
      "name": "qwen2.5-coder:14b",
      "model": "qwen2.5-coder:14b",
      "modified_at": "2025-01-25T15:48:17.044228948+05:30",
      "size": 8988124256,
      "digest": "3028237cc8c52fea4e77185d72cc997b2e90392791f7c82fe1c71995d56e642d",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "qwen2",
        "families": [
          "qwen2"
        ],
        "parameter_size": "14.8B",
        "quantization_level": "Q4_K_M"
      }


## Show Model Info

In [11]:
# Import required libraries
import requests  # For making HTTP requests
import json     # For handling JSON data

# Define the model name to query information about
model_name = "qwen2.5:latest"

# Make a POST request to the show endpoint to get model details
# The request includes the model name in the JSON payload
response = requests.post(f"{ollama_url}{show_endpoint}", json={"model": model_name})

# Print the response in a formatted JSON structure with indentation
# This will show detailed information about the model including:
# - Model parameters
# - License information
# - System requirements
# - Model architecture details
print(json.dumps(response.json(), indent=2))



{
  "license": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n    

## Generate Chat Response (stream=False)

In [48]:
# Import required libraries for HTTP requests and JSON data handling
import requests  # For making HTTP API calls to Ollama server
import json      # For parsing and formatting JSON data structures

# Define the model name to be used for text generation
# deepseek-r1:1.5b is a 1.5B parameter language model optimized for code and text generation
model_name = "deepseek-r1:1.5b"

# Define the user prompt/query to send to the model
# This is a simple greeting to test the model's basic conversation capabilities
prompt = "Hello, how are you?"

# Configure the parameters for the chat completion API call
parameters = {
    "model": model_name,  # Specify which model variant to use
    "messages": [
        # Define the conversation history as a list of message objects
        {"role": "user", "content": prompt}  # Add the user's message/prompt
    ],
    "stream": False,  # When False, wait for complete response before returning
    "options": {
        "num_predict": 100,    # Maximum number of tokens to generate in response
        "temperature": 0.5,    # Controls randomness (0.0=deterministic, 1.0=creative)
        "top_p": 0.9,         # Nucleus sampling - consider tokens with cumulative probability < top_p
        "top_k": 40,          # Consider only the top k most likely tokens at each step
    }
}

# Send the chat completion request to the Ollama API
response = requests.post(
    f"{ollama_url}{chat_endpoint}",  # Construct the full API endpoint URL
    json=parameters                   # Send parameters as JSON in request body
)

# Print the complete API response for debugging/inspection
print(json.dumps(response.json(), indent=2))

# Print a separator for better readability
print("--------------------------------")

# Extract and print just the generated message content from the response
print(response.json()['message']['content'])

{
  "model": "deepseek-r1:1.5b",
  "created_at": "2025-01-26T20:06:44.766667Z",
  "message": {
    "role": "assistant",
    "content": "<think>\n\n</think>\n\nHello! I'm just a virtual assistant, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today? \ud83d\ude0a"
  },
  "done_reason": "stop",
  "done": true,
  "total_duration": 444663500,
  "load_duration": 26808292,
  "prompt_eval_count": 9,
  "prompt_eval_duration": 51000000,
  "eval_count": 44,
  "eval_duration": 365000000
}
--------------------------------
<think>

</think>

Hello! I'm just a virtual assistant, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today? 😊


## Generate Text Response (stream=True)

In [47]:
# Import required libraries
import requests  # For making HTTP API calls to Ollama server
import json      # For parsing and formatting JSON data structures

# Define the model name to be used for text generation
# deepseek-r1:1.5b is a 1.5B parameter language model optimized for code and text generation
model_name = "deepseek-r1:1.5b"

# Define the user prompt/query to send to the model
# This is a simple greeting to test the model's basic conversation capabilities
prompt = "Hello, how are you?"

# Configure the parameters for the chat completion API call with streaming enabled
parameters = {
    "model": model_name,      # Specify which model variant to use
    "messages": [
        # Define the conversation history as a list of message objects
        {"role": "user", "content": prompt}  # Add the user's message/prompt
    ],
    "stream": True           # Enable streaming mode to receive response chunks incrementally
}

# Send the chat completion request to the Ollama API with streaming enabled
response = requests.post(
    f"{ollama_url}{chat_endpoint}",  # Construct the full API endpoint URL
    json=parameters,                  # Send parameters as JSON in request body
    stream=True                       # Enable streaming mode for the HTTP request
)

# Initialize empty string to store the complete response
final_response = ""

# Handle the streaming response from the API
if response.status_code == 200:
    # Iterate through each line in the streaming response
    for line in response.iter_lines():
        if line:
            # Decode the byte string to UTF-8 text
            decoded_line = line.decode('utf-8')
            # Parse the JSON response and extract the message content
            # Accumulate the content chunks into final_response
            final_response += json.loads(decoded_line)['message']['content']
            # Print each chunk for debugging/monitoring
            print(json.loads(decoded_line))
else:
    # Handle any errors that occurred during the API call
    print(f"Error: {response.status_code} - {response.text}")

# Print a separator for better readability
print("--------------------------------")
# Print the complete accumulated response
print(final_response)

{'model': 'deepseek-r1:1.5b', 'created_at': '2025-01-26T20:06:39.867227Z', 'message': {'role': 'assistant', 'content': '<think>'}, 'done': False}
{'model': 'deepseek-r1:1.5b', 'created_at': '2025-01-26T20:06:39.878781Z', 'message': {'role': 'assistant', 'content': '\n\n'}, 'done': False}
{'model': 'deepseek-r1:1.5b', 'created_at': '2025-01-26T20:06:39.88876Z', 'message': {'role': 'assistant', 'content': '</think>'}, 'done': False}
{'model': 'deepseek-r1:1.5b', 'created_at': '2025-01-26T20:06:39.898252Z', 'message': {'role': 'assistant', 'content': '\n\n'}, 'done': False}
{'model': 'deepseek-r1:1.5b', 'created_at': '2025-01-26T20:06:39.90741Z', 'message': {'role': 'assistant', 'content': 'Hello'}, 'done': False}
{'model': 'deepseek-r1:1.5b', 'created_at': '2025-01-26T20:06:39.916717Z', 'message': {'role': 'assistant', 'content': '!'}, 'done': False}
{'model': 'deepseek-r1:1.5b', 'created_at': '2025-01-26T20:06:39.925593Z', 'message': {'role': 'assistant', 'content': ' I'}, 'done': False

## Generate Text Response (stream=False)

In [58]:
# Import required libraries for making HTTP requests and handling JSON data
import requests
import json

# Define the model name to use for text generation
model_name = "deepseek-r1:1.5b"
# Set a simple greeting prompt to test the model
prompt = "Hello, how are you?"

# Configure the parameters for the text generation API call
parameters = {
    "model": model_name,      # Specify which model to use
    "prompt": prompt,         # The input text prompt
    "stream": False          # Disable streaming mode - get complete response at once
}

# Send a POST request to the Ollama generate endpoint with the parameters
response = requests.post(f"{ollama_url}{generate_endpoint}", json=parameters)
# Print the API response formatted as indented JSON for readability
print(json.dumps(response.json(), indent=2))


{
  "model": "deepseek-r1:1.5b",
  "created_at": "2025-01-26T20:14:48.170575Z",
  "response": "<think>\n\n</think>\n\nHello! I'm just a virtual assistant, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today? \ud83d\ude0a",
  "done": true,
  "done_reason": "stop",
  "context": [
    151644,
    9707,
    11,
    1246,
    525,
    498,
    30,
    151645,
    151648,
    271,
    151649,
    271,
    9707,
    0,
    358,
    2776,
    1101,
    264,
    4108,
    17847,
    11,
    773,
    358,
    1513,
    944,
    614,
    15650,
    11,
    714,
    358,
    2776,
    1588,
    323,
    5527,
    311,
    1492,
    498,
    448,
    8820,
    498,
    1184,
    13,
    2585,
    646,
    358,
    7789,
    498,
    3351,
    30,
    26525,
    232
  ],
  "total_duration": 450474583,
  "load_duration": 27623417,
  "prompt_eval_count": 9,
  "prompt_eval_duration": 56000000,
  "eval_count": 44,
  "eval_duration": 366000000
}

## Generate Embeddings


In [54]:
# Import required libraries for making HTTP requests and handling JSON data
import requests
import json

# Define the model name for text embeddings
# nomic-embed-text is optimized for generating vector embeddings from text
model_name = "nomic-embed-text:latest"

# Configure the parameters for the embeddings API call
parameters = {
    "model": model_name,  # Specify the embedding model to use
    "input": [
        "Why is the sky blue?",  # First text to get embeddings for
        "Why is the grass green?"  # Second text to get embeddings for
    ]  # List of text inputs that will be converted to vector embeddings
}

# Send a POST request to the Ollama embeddings endpoint
# The API will return vector embeddings for each input text
response = requests.post(f"{ollama_url}{embeddings_endpoint}", json=parameters)

# Print the API response as formatted JSON
# The response contains embedding vectors that can be used for 
# semantic similarity comparisons, clustering, or other NLP tasks
print(json.dumps(response.json(), indent=2))
    

{
  "model": "nomic-embed-text:latest",
  "embeddings": [
    [
      0.00972523,
      0.044499267,
      -0.14063331,
      0.0013221583,
      0.032130003,
      0.10730237,
      -0.008445876,
      0.010107326,
      0.000524774,
      -0.03553925,
      0.03363763,
      0.062057853,
      0.1026143,
      0.08572545,
      0.023684796,
      0.03353076,
      -0.033604786,
      -0.018617978,
      0.047974523,
      -0.027137836,
      -0.05642295,
      -0.043552052,
      0.016541155,
      -0.03508129,
      0.06365866,
      0.0432306,
      0.03346612,
      -0.00039299962,
      0.0001672867,
      -0.018887838,
      0.058006234,
      0.0023211385,
      0.0181633,
      -0.03738371,
      0.03320825,
      -0.05963872,
      0.06684471,
      0.026961554,
      0.0063460255,
      -0.016740182,
      0.0018997231,
      -0.035354227,
      -0.010952537,
      0.008628692,
      0.02293588,
      -0.049730465,
      0.015360829,
      0.050267164,
      -0.022713734,
  