In [None]:
#@markdown <h3>Check GPU Availability</h3>

! nvidia-smi
! nvcc -V
! free -h


In [None]:
#@markdown <h3>Install Ollama and Cloudflare Tunnel</h3>
import os
import urllib.request
from IPython.display import clear_output
import subprocess

HOME = os.path.expanduser("~")

# define helper functions

def textAn(TEXT, ty='d'):
      from IPython.display import HTML

      if ty == 'd':
            return display(HTML('''<style>@import url(https://fonts.googleapis.com/css?family=Raleway:400,700,900,400italic,700italic,900italic);#wrapper {   font: 17px 'Raleway', sans-serif;animation: text-shadow 1.5s ease-in-out infinite;    margin-left: auto;    margin-right: auto;    }#container {    display: flex;    flex-direction: column;    float: left;     }@keyframes text-shadow { 0% 20% {          transform: translateY(-0.1em);        text-shadow:             0 0.1em 0 #0c2ffb,             0 0.1em 0 #2cfcfd,             0 -0.1em 0 #fb203b,             0 -0.1em 0 #fefc4b;    }    40% {          transform: translateY(0.1em);        text-shadow:             0 -0.1em 0 #0c2ffb,             0 -0.1em 0 #2cfcfd,             0 0.1em 0 #fb203b,             0 0.1em 0 #fefc4b;    }       60% {        transform: translateY(-0.1em);        text-shadow:             0 0.1em 0 #0c2ffb,             0 0.1em 0 #2cfcfd,             0 -0.1em 0 #fb203b,             0 -0.1em 0 #fefc4b;    }   }@media (prefers-reduced-motion: reduce) {    * {      animation: none !important;      transition: none !important;    }}</style><div id="wrapper"><div id="container">'''+TEXT+'''</div></div>'''))
      elif ty == 'twg':
            textcover = str(len(TEXT)*0.55)
            return display(HTML('''<style>@import url(https://fonts.googleapis.com/css?family=Anonymous+Pro);.line-1{font-family: 'Anonymous Pro', monospace;    position: relative;   border-right: 1px solid;    font-size: 15px;   white-space: nowrap;    overflow: hidden;    }.anim-typewriter{  animation: typewriter 0.4s steps(44) 0.2s 1 normal both,             blinkTextCursor 600ms steps(44) infinite normal;}@keyframes typewriter{  from{width: 0;}  to{width: '''+textcover+'''em;}}@keyframes blinkTextCursor{  from{border-right:2px;}  to{border-right-color: transparent;}}</style><div class="line-1 anim-typewriter">'''+TEXT+'''</div>'''))

def loadingAn(name="cal"):
      from IPython.display import HTML

      if name == "cal":
          return display(HTML('<style>.lds-ring {   display: inline-block;   position: relative;   width: 34px;   height: 34px; } .lds-ring div {   box-sizing: border-box;   display: block;   position: absolute;   width: 34px;   height: 34px;   margin: 4px;   border: 5px solid #cef;   border-radius: 50%;   animation: lds-ring 1.2s cubic-bezier(0.5, 0, 0.5, 1) infinite;   border-color: #cef transparent transparent transparent; } .lds-ring div:nth-child(1) {   animation-delay: -0.45s; } .lds-ring div:nth-child(2) {   animation-delay: -0.3s; } .lds-ring div:nth-child(3) {   animation-delay: -0.15s; } @keyframes lds-ring {   0% {     transform: rotate(0deg);   }   100% {     transform: rotate(360deg);   } }</style><div class="lds-ring"><div></div><div></div><div></div><div></div></div>'))
      elif name == "lds":
          return display(HTML('''<style>.lds-hourglass {  display: inline-block;  position: relative;  width: 34px;  height: 34px;}.lds-hourglass:after {  content: " ";  display: block;  border-radius: 50%;  width: 34px;  height: 34px;  margin: 0px;  box-sizing: border-box;  border: 20px solid #dfc;  border-color: #dfc transparent #dfc transparent;  animation: lds-hourglass 1.2s infinite;}@keyframes lds-hourglass {  0% {    transform: rotate(0);    animation-timing-function: cubic-bezier(0.55, 0.055, 0.675, 0.19);  }  50% {    transform: rotate(900deg);    animation-timing-function: cubic-bezier(0.215, 0.61, 0.355, 1);  }  100% {    transform: rotate(1800deg);  }}</style><div class="lds-hourglass"></div>'''))

loadingAn()
textAn("Installing Ollama and Cloudflare Tunnel...", "twg")

# Install Ollama
!curl https://ollama.ai/install.sh | sh

# Install Cloudflared
!curl -L --output cloudflared.deb https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared.deb
!rm cloudflared.deb

clear_output()
print("✅ Installation completed!")


In [None]:
#@markdown <h3>Start Ollama Server</h3>
import os
import signal
import subprocess
from IPython.display import clear_output

def start_ollama_server():
    # Kill any existing ollama process
    try:
        subprocess.run(['pkill', 'ollama'], check=False)
    except:
        pass

    # Start ollama serve in background with GPU support
    process = subprocess.Popen(
        ['ollama', 'serve'],
        env={
            **os.environ,
            'OLLAMA_HOST': '0.0.0.0:11434',
            'CUDA_VISIBLE_DEVICES': '0',  # Use first GPU
            'OLLAMA_CUDA': '1'  # Enable CUDA support
        },
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )

    # Wait briefly to ensure server starts
    import time
    time.sleep(2)

    # Check if process is running
    if process.poll() is None:
        clear_output()
        print("✅ Ollama server is running with GPU support!")
        return process
    else:
        clear_output()
        print("❌ Failed to start Ollama server")
        return None

ollama_process = start_ollama_server()

In [None]:
#@markdown <h3>Pull Ollama Model</h3>
#@markdown Select a model to pull if not already present

MODEL_NAME = "llama3.2:3b" #@param ["llama3.2:3b", "deepseek-r1:14b","deepseek-r1:8b", "qwen2.5:7b", "qwen2.5:14b", "mistral", "qwen2.5-coder:7b", "phi4:14b", "gemma:7b", "gemma2:9b"] {allow-input: true}
FORCE_PULL = False #@param {type:"boolean"}

import subprocess
from IPython.display import clear_output

def check_model_exists(model_name):
    max_retries = 3
    for _ in range(max_retries):
        result = subprocess.run(['ollama', 'list'],
                              capture_output=True,
                              text=True,
                              timeout=10)
        if result.returncode == 0:
            return model_name in result.stdout
        time.sleep(2)
    return False

def pull_model(model_name, force=False):
    if not force and check_model_exists(model_name):
        print(f"✅ Model {model_name} is already pulled")
        return True

    print(f"📥 Pulling model {model_name}...")
    process = subprocess.Popen(
        ['ollama', 'pull', model_name],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT  # Capture stderr as well
    )

    # Show progress with error handling
    error_output = []
    while True:
        output = process.stdout.readline()
        if output == b'' and process.poll() is not None:
            break
        if output:
            line = output.decode().strip()
            print(line)
            if "error" in line.lower():
                error_output.append(line)

    exit_code = process.poll()
    if exit_code == 0:
        clear_output()
        print(f"✅ Successfully pulled {model_name}")
        return True
    else:
        clear_output()
        print(f"❌ Failed to pull {model_name}")
        if error_output:
            print("Error details:")
            for err in error_output[-3:]:  # Show last 3 error lines
                print(f"| {err}")
        return False

# Add retry logic for the pull command
max_retries = 2
for attempt in range(max_retries + 1):
    if pull_model(MODEL_NAME, FORCE_PULL):
        break
    if attempt < max_retries:
        print(f"Retrying pull ({attempt + 1}/{max_retries})...")
        time.sleep(5)

In [None]:
#@markdown <h3>List Pulled Models</h3>

import subprocess
from IPython.display import clear_output, Markdown

def get_pulled_models():
    try:
        result = subprocess.run(
            ['ollama', 'list'],
            capture_output=True,
            text=True,
            check=True
        )
        models = []
        # Parse output, skip header line
        for line in result.stdout.split('\n')[1:]:
            if line.strip():
                model_name = line.split()[0]
                models.append(model_name)
        return models
    except subprocess.CalledProcessError as e:
        print("Error retrieving model list:")
        print(e.stderr)
        return []
    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return []

pulled_models = get_pulled_models()

clear_output()
if pulled_models:
    print("✅ Pulled Models:")
    display(Markdown("\n".join([f"- 🦙 {model}" for model in pulled_models])))
else:
    print("ℹ️ No models pulled yet. Use the model pull section above first.")


In [None]:
#@markdown <h3>Expose Ollama API via Cloudflare Tunnel</h3>

import socket
import subprocess
import time
import re
import json
import requests
from IPython.display import clear_output, Markdown

#@markdown ### API Settings
API_KEY = "" #@param {type:"string"}
WORKER_URL = "https://nazkvhub.nazdridoy.workers.dev" #@param {type:"string"}
UNIQUE_KEY = "ollama-tunnel" #@param {type:"string"}

def expose_ollama_api():
    # Verify Ollama is running
    if not ollama_process or ollama_process.poll() is not None:
        print("❌ Ollama server not running! Start the server in Block 2 first.")
        return None

    # Check API connectivity
    try:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.settimeout(2)
            s.connect(("localhost", 11434))
    except:
        print("❌ Ollama API not responding on port 11434")
        return None

    # Start Cloudflare tunnel
    process = subprocess.Popen(
        ["cloudflared", "tunnel", "--url", "http://localhost:11434"],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True
    )

    # Wait for tunnel URL
    public_url = None
    start_time = time.time()
    while time.time() - start_time < 30:  # 30 second timeout
        line = process.stdout.readline()
        if not line:
            continue

        # Look for tunnel URL in output
        match = re.search(r'https://[a-zA-Z0-9\-]+\.trycloudflare\.com', line)
        if match:
            public_url = match.group(0)
            break

    if public_url:
        clear_output()
        display(Markdown(f"### 🚀 Public Ollama API URL: [{public_url}]({public_url})"))
        print("Test with:")
        print(f'curl {public_url}/api/tags')
        print(f'curl {public_url}/api/generate -d \'{{"model":"llama2","prompt":"Hi"}}\'')

        # Store URL in nazkvhub
        try:
            url_data = {
                "url": public_url,
                "immutable": False  # Allow updating when tunnel URL changes
            }
            response = requests.post(
                f'{WORKER_URL}/v1/save/{UNIQUE_KEY}',
                headers={
                    'Authorization': f'Bearer {API_KEY}',
                    'Content-Type': 'application/json'
                },
                json=url_data
            )

            if response.status_code == 200:
                print(f"\n✅ URL stored in KV successfully")
                print(f"Access at: {WORKER_URL}/v1/query/{UNIQUE_KEY}")
            else:
                print(f"\n❌ Failed to store URL: {response.status_code}")
                print(f"Response: {response.text}")
        except Exception as e:
            print(f"\n❌ Error storing URL: {str(e)}")

        return public_url
    else:
        print("❌ Failed to create tunnel. Cloudflared output:")
        print(process.communicate()[0])
        process.kill()
        return None

# Expose the API
api_url = expose_ollama_api()

## <img src='http://icons.iconarchive.com/icons/blackvariant/button-ui-system-apps/1024/Terminal-icon.png' height="25" alt="dedug" />___Debug___

In [None]:
!curl -X POST http://localhost:11434/api/generate -d '{"model":"gemma2:2b","prompt":"Hi"}'

In [None]:
!ollama rm gemma2:2b

In [None]:
!ps aux | grep ollama

In [None]:
!pkill ollama

In [None]:
!pkill cloudflared

In [None]:
!sudo ss -tunlp | grep "LISTEN"

In [None]:
!npm install -g localtunnel

In [None]:
!lt --port 11434 &

In [None]:
!curl https://loca.lt/mytunnelpassword