<a href="https://colab.research.google.com/github/ljkrajewski/jupyter_notebooks/blob/main/ollama/ollama_w_gradio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Defined globals
#@markdown **Common models**
#@markdown - llama3
#@markdown - llama2-uncensored
#@markdown - dolphin-mistral
#@markdown - codellama:34b
#@markdown - deepseek-r1:14b

#@markdown **Model search/lookups**
#@markdown   - [ollama model library](https://ollama.com/library)
#@markdown   - [ollama model search](https://ollama.com/search)

model_name="llama3" #@param {type: "string"}  The name of the LLM.
debug=True #@param {type: "boolean"}
model_endpoint="http://localhost:11434/api/generate" #The endpoint for the LLM's API.

# Install and start ollama

In [None]:
#@title Install ollama
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
#@title Start ollama
import multiprocessing
import os
import time
import requests
import json

def run_ollama():
    print(f"Running ollama on PID {os.getpid()}")
    os.system('ollama serve')

ollama_process = multiprocessing.Process(target=run_ollama)
ollama_process.start()
time.sleep(10)

!ollama pull $model_name

In [None]:
#@title Test ollama connection
!curl http://localhost:11434

In [None]:
#@title Defined functions
# prompt: Write a function that takes a dictionary prompt and sends a request to an LLM's API. The output is given in dictionary.

def query_llm(prompt):
  global model_endpoint,model_name

  headers = {
      "Content-Type": "application/json",
  }
  data = {
      "model": model_name,
      "prompt": prompt,
      "stream": False
  }

  answer = requests.post(model_endpoint, headers=headers, json=data)

  if answer.status_code == 200:
    try:
      answer_dict = json.loads(answer.content)
    except json.JSONDecodeError as e:
      print(f"Error decoding JSON: {e}")
      return None
    return answer_dict["response"].replace("\n", " ")
  else:
    print(f"Error: {answer.status_code}")
    return None

# Main routine

In [None]:
#@title Start gradio
!pip install -q gradio

import gradio as gr

def generate(prompt,current):
  new_prompt = "User: " + prompt + "\n-----\n"
  answer = query_llm(current + new_prompt)
  return current + new_prompt + answer + "\n=====\n"

with gr.Blocks(analytics_enabled=False) as demo:
  with gr.Column():
    with gr.Row():
      prompt_box = gr.Textbox(lines=3, interactive=True, value="Put prompt here.", label="Prompt")
      generate_button = gr.Button("Generate")
    result_box = gr.Textbox(lines=20, interactive=False, label="Result")

  generate_button.click(fn=generate, inputs=[prompt_box,result_box], outputs=[result_box])

demo.queue().launch(inline=False, share=True, debug=True)