<a href="https://colab.research.google.com/github/preunpatching/preunpatching.github.io/blob/main/Ollama_Runtime_for_Google_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Ollama Runtime for Google Colab
This notebook allows you to quickly run Ollama on Google servers, which can be useful for most people who do not have a powerful enough PC to run certain models.
## How to run
### Start notebook
It's recommended that the runtime is configured to use a T4 GPU. To do that, go to Runtime > Change runtime type, then select the hardware accelerator to "T4 GPU". The runtime may run with just a CPU, however this will cause serious performance degredations compared to with a GPU.
Once ready, press Ctrl+F9, or go to Runtime > Run all.
### Start model
When the notebook asks you to select a compatible Ollama model to use, you may go to https://ollama.com/models to see the library of available models, or use the in-built model search function by leaving the model selection blank. **Be aware that not all models will run on the best free version of the runtime (16 GB RAM, T4 GPU with 16 GB VRAM), so pick the correct model to use!**
## Bugs
*   None

Once done, you're ready to chat with your selected model!
Have fun!

In [None]:
# Check if we're using an Nvidia GPU.
import os
if os.path.isfile("/opt/bin/nvidia-smi"):
  print("Nvidia GPU detected.")
  # Install pciutils so that Ollama can auto-detect the Nvidia GPU.
  !sudo apt install pciutils
else:
  print("Couldn't detect Nvidia GPU. This will cause serious performance degredations when using a CPU only. It is recommended that you switch to a Nvidia GPU to prevent performance degredations.")
  input("Press ENTER to continue . . .")
# Install Ollama, as well as its API and BS4 for model search function.
!pip install ollama bs4
!curl -fsSL https://ollama.com/install.sh | sh
print("\033[0m", end='')
# Start Ollama server in a seperate process.
import subprocess
import signal
process = subprocess.Popen("ollama serve", shell=True)
# Wait for Ollama server to initialize.
import time
time.sleep(0.5)
# Prompt user for which Ollama model to use.
model = ""
while not model:
  model = input("Select compatible Ollama model to use (leave blank to search): ")
  if not model:
    query = input("Type in query for model search: ")
    import requests
    from bs4 import BeautifulSoup

    def get_ollama_model_metadata(search_query="", category="", order="", limit=None):
        """
        Retrieves metadata for models from the Ollama search page with options.

        Args:
            search_query (str): The search query to use.
            category (str): Filter by category ("embedding", "vision", "tools").
            order (str): Order by ("newest").
            limit (int, optional): Limit the number of results.
        Returns:
            list: A list of dictionaries, where each dictionary contains the model's metadata.
        """

        url = "https://ollama.com/search"
        params = {"q": search_query}

        if category:
            params["c"] = category
        if order:
            params["o"] = order

        try:
            response = requests.get(url, params=params)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, "html.parser")
            model_list = soup.find("ul", role="list")

            if not model_list:
                return []

            models = []
            for model_item in model_list.find_all("li", attrs={"x-test-model": ""}):
                model_data = {}

                link = model_item.find("a", class_="group w-full")
                if link:
                    model_data["Link"] = "https://ollama.com" + link["href"]

                title_div = model_item.find("div", class_="flex flex-col mb-1")
                if title_div:
                    model_data["Title"] = title_div.get("title")

                description_p = model_item.find("p", class_="max-w-lg break-words text-neutral-800 text-md")
                if description_p and description_p.text.strip():
                    model_data["Description"] = description_p.text.strip()

                metadata_div = model_item.find("div", class_="flex flex-col")
                if metadata_div:
                    capabilities = metadata_div.find_all("span", attrs={"x-test-capability": ""})
                    capability_list = [cap.text.strip() for cap in capabilities]
                    if capability_list:
                        model_data["Capabilities"] = " | ".join(capability_list)

                    sizes = metadata_div.find_all("span", attrs={"x-test-size": ""})
                    size_list = [size.text.strip() for size in sizes]
                    if size_list:
                        model_data["Sizes"] = " | ".join(size_list)

                pull_count_span = model_item.find("span", attrs={"x-test-pull-count": ""})
                if pull_count_span:
                    model_data["Pull count"] = pull_count_span.text.strip()

                updated_span = model_item.find("span", attrs={"x-test-updated": ""})
                if updated_span:
                    model_data["Last updated"] = updated_span.text.strip()

                models.append(model_data)
                if limit and int(len(models)) >= limit:
                    break

            return models

        except requests.exceptions.RequestException as e:
            print(f"Error fetching data: {e}")
            return []
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            return []

    if True:
        search_term = query
        category_filter = ""
        order_by = ""
        result_limit = 10

        model_metadata = get_ollama_model_metadata(
            search_term, category_filter, order_by, result_limit
        )

        if model_metadata:
            for model_entry in model_metadata:
                print("---")
                for key, value in model_entry.items():
                    print(f"{key}: {value}")
        else:
            print("No models found.")
# Pull selected model.
from tqdm import tqdm
from ollama import pull
current_digest, bars = '', {}
for progress in pull(model, stream=True):
  digest = progress.get('digest', '')
  if digest != current_digest and current_digest in bars:
    bars[current_digest].close()

  if not digest:
    print(progress.get('status'))
    continue

  if digest not in bars and (total := progress.get('total')):
    bars[digest] = tqdm(total=total, desc=f'pulling {digest[7:19]}', unit='B', unit_scale=True)

  if completed := progress.get('completed'):
    bars[digest].update(completed - bars[digest].n)

  current_digest = digest
# Start model.
from ollama import chat
messages = []
while True:
  try:
    user_input = input('>>> ')
    if user_input.lower() in ["/bye"]:
      print("Terminating Ollama server...")
      process.send_signal(signal.SIGTERM)
      try:
          process.wait(timeout=5)  # Wait up to 5 seconds
          print("Ollama server terminated gracefully.")
      except subprocess.TimeoutExpired:
          print("Terminating Ollama server forcefully...")
          process.send_signal(signal.SIGKILL) # Force termination
          process.wait()
          print("Ollama server terminated forcefully.")
      break
    elif user_input.lower() in ["/clear"]:
      messages = []
      print("Cleared session context")
    else:
      response = ''
      for part in chat(
        model,
        messages=messages
        + [
          {'role': 'user', 'content': user_input},
        ],
        stream=True,
      ):
        response = response + part['message']['content']
        print(part['message']['content'], end='', flush=True)

      # Add the response to the messages to maintain the history.
      messages += [
        {'role': 'user', 'content': user_input},
        {'role': 'assistant', 'content': response},
      ]
      print(response + '\n')
  except KeyboardInterrupt:
    print("Terminating Ollama server...")
    process.send_signal(signal.SIGTERM)
    try:
        process.wait(timeout=5)  # Wait up to 5 seconds
        print("Ollama server terminated gracefully.")
    except subprocess.TimeoutExpired:
        print("Terminating Ollama server forcefully...")
        process.send_signal(signal.SIGKILL) # Force termination
        process.wait()
        print("Ollama server terminated forcefully.")
    break

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
pulling 43070e2d4e53... 100% ▕▏  11 KB                         
pulling e0daf17ff83e... 100% ▕▏   21 B                         [?25h[?25l[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1Gpulling manifest 
pulling 102a747c1376... 100% ▕▏  14 GB                         
pulling 6db27cd4e277... 100% ▕▏  695 B                         
pulling 6d7b25ffd247... 100% ▕▏  644 B                         
pulling 43070e2d4e53... 100% ▕▏  11 KB                         
pulling e0daf17ff83e... 100% ▕▏   21 B                         
pulling 41ffc852c4b6...   0% ▕▏    0 B/ 562 B                  [?25h[?25l[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1G[A[2K[1Gpulling manifest 
pulling 102a747c1376... 100% ▕▏  14 GB                         
pulling 6db27cd4e277... 100% ▕▏  695 B                         
pulling 6d7b25ffd247... 100% ▕▏  644 B                         
pulling 43070e2d4e53... 100% ▕▏  11 