In [7]:
!sudo apt update
!sudo apt install -y pciutils
!echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
!sudo apt install -y cuda-drivers
!curl https://ollama.ai/install.sh | sh
!pip install ollama
!pip install gradio

import os

[33m0% [Working][0m            Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
[33m0% [Connecting to archive.ubuntu.com (91.189.91.83)] [Connecting to security.ub[0m                                                                               Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:3 https://cli.github.com/packages stable InRelease
Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Do

In [13]:
import os
print(f"Current LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH', 'Not Set')}")

Current LD_LIBRARY_PATH: /usr/lib64-nvidia


In [18]:
import subprocess
import time
import urllib.request
import os

os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'
os.environ['OLLAMA_ORIGINS'] = '*'

subprocess.run(['pkill', 'ollama'])
time.sleep(2)

log_file = open('ollama.log', 'w')
process = subprocess.Popen(['ollama', 'serve'], stdout=log_file, stderr=log_file)

print("Starting Ollama server...")
url = "http://127.0.0.1:11434"
for i in range(60):
    try:
        with urllib.request.urlopen(url) as response:
            if response.status == 200:
                print("Ollama server is running and ready!")
                break
    except Exception:
        time.sleep(1)
else:
    print("Timed out waiting for Ollama server to start.")
    print("--- Server Logs ---")
    log_file.close()
    with open('ollama.log', 'r') as f:
        print(f.read())

Starting Ollama server...
Ollama server is running and ready!


In [None]:
import gradio as gr
import ollama

def chat_stream(message, history, system_prompt, model_name, temperature, top_p, top_k):
    if history is None:
        history = []

    history.append({'role': 'user', 'content': message})

    ollama_messages = [{'role': 'system', 'content': system_prompt}] + history

    history.append({'role': 'assistant', 'content': ""})

    options = {
        'temperature': temperature,
        'top_p': top_p,
        'top_k': int(top_k),
    }

    try:
        stream = ollama.chat(
            model=model_name,
            messages=ollama_messages,
            stream=True,
            options=options
        )

        partial_message = ""
        for chunk in stream:
            if 'message' in chunk and 'content' in chunk['message']:
                content = chunk['message']['content']
                partial_message += content

                history[-1]['content'] = partial_message

                yield "", history
    except Exception as e:
        history[-1]['content'] = f"Error: {str(e)}"
        yield "", history

def pull_model_func(model_name, progress=gr.Progress()):
    if not model_name:
        return "Please enter a model name."

    try:
        progress(0, desc=f"Starting pull for {model_name}...")
        for status in ollama.pull(model_name, stream=True):
            if 'completed' in status and 'total' in status:
                val = status['completed'] / status['total']
                progress(val, desc=f"Pulling {model_name}: {status.get('status', '')}")
            elif 'status' in status:
                progress(None, desc=f"{status['status']}")

        return f"Successfully pulled '{model_name}'"
    except Exception as e:
        return f"Failed to pull '{model_name}': {str(e)}"

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Ollama Dashboard")

    with gr.Row():
        with gr.Column(scale=4):
            chatbot = gr.Chatbot(height=600, type="messages")

            with gr.Row():
                msg = gr.Textbox(
                    label="Message",
                    placeholder="Type here...",
                    scale=4,
                    autofocus=True
                )
                send_btn = gr.Button("Send", scale=1, variant="primary")

            clear = gr.Button("Clear Context")

        with gr.Column(scale=1, variant="panel"):
            gr.Markdown("### ⚙️ Configuration")

            model_input = gr.Textbox(
                label="Model Name",
                value="dolphin3",
                info="Enter model name (e.g., dolphin3, llama3, mistral) to chat or pull."
            )
            pull_btn = gr.Button("Pull Model")
            pull_status = gr.Textbox(label="Pull Status", interactive=False, max_lines=2)

            gr.Markdown("---")

            system_input = gr.Textbox(
                label="System Instructions",
                value="You are a helpful AI assistant.",
                lines=5
            )
            temp_slider = gr.Slider(0.0, 1.0, value=0.7, label="Temperature")
            top_p_slider = gr.Slider(0.0, 1.0, value=0.9, label="Top P")
            top_k_slider = gr.Slider(0, 100, value=40, step=1, label="Top K")

    pull_btn.click(
        pull_model_func,
        inputs=[model_input],
        outputs=[pull_status]
    )

    msg.submit(
        chat_stream,
        inputs=[msg, chatbot, system_input, model_input, temp_slider, top_p_slider, top_k_slider],
        outputs=[msg, chatbot]
    )

    send_btn.click(
        chat_stream,
        inputs=[msg, chatbot, system_input, model_input, temp_slider, top_p_slider, top_k_slider],
        outputs=[msg, chatbot]
    )

    clear.click(lambda: [], None, chatbot, queue=False)

demo.launch(share=True, debug=True)

  with gr.Blocks(theme=gr.themes.Soft()) as demo:
  chatbot = gr.Chatbot(height=600, type="messages")


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://1df25e52a17a58cd03.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
