# Running ollama and exposing over NGROK

This notebook allows to run ollama with the model of choice on Kaggle and expose its port over NGROK (account required) to be able to connect to it remotely.
Userful for experiments with on systems not having enough of hardware capacity.

The notebook can work in background (through "Save Version") and keep the NGROK open for an hour.

## Set parameters

In [None]:
import json

tunnel_run_time_minutes = {{ tunnel_run_time_minutes }}
tunnel_metadata = json.dumps({{ tunnel_metadata }})

## Install ollama

In [None]:
!curl https://ollama.ai/install.sh | sh

## Start ollama

In [None]:
import subprocess
import time
import os

# Set up a small timeout for models loading (in sec)
os.environ["OLLAMA_LOAD_TIMEOUT"] = "15"
os.environ["OLLAMA_MAX_LOADED_MODELS"] = "2"

# Start ollama as a backrgound process
command = "nohup ollama serve&"

# Use subprocess.Popen to start the process in the background
process = subprocess.Popen(command,
                            shell=True,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
print("Process ID:", process.pid)
time.sleep(5)  # Makes Python wait for 5 seconds

## Expose ollama over ngrok

In [None]:
!pip install pyngrok

## Get NGROK key from secrets

In [None]:
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
secret_ngrok = user_secrets.get_secret("NGROK")

## Start ngrok

In [None]:
from pyngrok import ngrok

ngrok.set_auth_token(secret_ngrok)

ollama_tunnel = ngrok.connect("11434", "http", host_header="rewrite", metadata=tunnel_metadata)
print(ollama_tunnel)

## Test the model over tunnel

In [None]:
!pip install ollama

## Test calling ollama over remote address

In [None]:
import ollama

client = ollama.Client(host=ollama_tunnel.public_url)
client.list()

## let the notebook run for a while

In [None]:
import datetime
from dateutil import tz
from time import sleep

tzber = tz.gettz('Europe / Berlin')
start_time = datetime.datetime.now(tzber)
while datetime.datetime.now(tzber) < start_time + datetime.timedelta(minutes=tunnel_run_time_minutes):
    print(f"Wating: {datetime.datetime.now(tzber)} till {start_time + datetime.timedelta(minutes=tunnel_run_time_minutes)} {ollama_tunnel.public_url}")
    print(client.ps())
    sleep(60)

## Close the tunnel

In [None]:
ngrok.disconnect(ollama_tunnel.public_url)