<a href="https://colab.research.google.com/github/ljkrajewski/jupyter_notebooks/blob/main/ollama/ollama_w_gradio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Defined globals
#@markdown **Common models**
#@markdown - "llama3"
#@markdown - "llama2-uncensored"
#@markdown - "dolphin-mistral"
#@markdown - "codellama:34b"
#@markdown - "deepseek-r1:14b"

#@markdown **Model search/lookups**
#@markdown   - [ollama model library](https://ollama.com/library)
#@markdown   - [ollama model search](https://ollama.com/search)

model_name="llama3.2:1b" #@param {type: "string"}  The name of the LLM.
#@markdown Remember to put the name of the model in quotes. E.g., "llama3.2:1b"
#debug=True #@param {type: "boolean"}
model_endpoint="http://localhost:11434/api/generate" #The endpoint for the LLM's API.

# Install and start ollama

In [2]:
#@title Install and start ollama

!curl -fsSL https://ollama.com/install.sh | sh

import multiprocessing
import os
import time
import requests
import json

def run_ollama():
    print(f"Running ollama on PID {os.getpid()}")
    os.system('ollama serve')

ollama_process = multiprocessing.Process(target=run_ollama)
ollama_process.start()
time.sleep(10)

!ollama pull $model_name

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
Running ollama on PID 665
[?25lpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest 
pulling 74701a8c35f6...   0% ▕▏    0 B/1.3 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 74701a8c35f6...   0% ▕▏    0 B/1.3 GB                  [?25h[?25l[2K[1G[A[2

In [3]:
#@title Defined functions
# prompt: Write a function that takes a dictionary prompt and sends a request to an LLM's API. The output is given in dictionary.

def query_llm(prompt):
  global model_endpoint,model_name

  headers = {
      "Content-Type": "application/json",
  }
  data = {
      "model": model_name,
      "prompt": prompt,
      "stream": False
  }

  answer = requests.post(model_endpoint, headers=headers, json=data)

  if answer.status_code == 200:
    try:
      answer_dict = json.loads(answer.content)
    except json.JSONDecodeError as e:
      print(f"Error decoding JSON: {e}")
      return None
    return answer_dict["response"] #.replace("\n", " ")
  else:
    print(f"Error: {answer.status_code}")
    return None

# Main routine

In [4]:
#@title Test ollama connection
!curl http://localhost:11434

Ollama is running

In [6]:
#@title Start gradio
!pip install -q gradio

import gradio as gr
import re
#from google.colab import files

def separate_sections(text_stream):
    match = re.search(r"<think>(.*?)</think>(.*)", text_stream, re.DOTALL)
    if match:
        thinking_section = match.group(1).strip()
        results_section = match.group(2).strip()
        return thinking_section, results_section
    else:
        return "", text_stream.strip()  # Entire input is results if no <think>

def generate(prompt,current):
  new_prompt = "**" + prompt + "**\n"
  result = separate_sections(query_llm(current + new_prompt))
  thinking = result[0]
  answer = result[1]
  return current + new_prompt + answer + "\n", thinking

def clear_outputs():
  return "", "", "", ""

def save_log(current):
  logdir = "/content/logs"
  if not os.path.exists(logdir):
    os.makedirs(logdir)
  logname = time.strftime("%Y%m%d-%H%M%S") + ".md"
  logpath = f"{logdir}/{logname}"
  with open(logpath, "w") as f:
    f.write(current)
  #files.download(logpath)
  #os.remove(logpath)
  return f"{logname} created. Download from files browser."

with gr.Blocks(analytics_enabled=False) as demo:
  with gr.Column():
    with gr.Row():
      prompt_box = gr.Textbox(lines=10, interactive=True, value="", label="Prompt")
      with gr.Column():
        generate_button = gr.Button("Generate")
        clear_button = gr.Button("Clear chat")
        savelog_button = gr.Button("Save log")
        log_box = gr.Textbox(lines=1, interactive=False, label="System Messages")
    with gr.Row():
      #result_box = gr.Textbox(lines=20, interactive=False, label="Result")
      #think_box = gr.Textbox(lines=20, interactive=False, label="Thought Process (DeepSeek-R1 only)")
      result_box = gr.Markdown(height=100, label="Result")
      think_box = gr.Markdown(height=100, label="Thought Process (DeepSeek-R1 only)")

  generate_button.click(fn=generate, inputs=[prompt_box,result_box], outputs=[result_box,think_box])
  clear_button.click(fn=clear_outputs, inputs=[], outputs=[result_box,think_box,prompt_box,log_box])
  savelog_button.click(fn=save_log, inputs=[result_box], outputs=[log_box])

demo.queue().launch(inline=False, share=True, debug=True)

TypeError: Textbox.__init__() got an unexpected keyword argument 'height'