<a href="https://colab.research.google.com/github/kushalshah0/colab_tools/blob/main/ollama_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 1. Install Zstandard (zstd) Dependency

In [None]:
print('Installing zstd dependency...')
!sudo apt-get update && sudo apt-get install -y zstd
print('zstd installed.')

### 2. Install Ollama

In [None]:
print('Installing Ollama...')
!curl -fsSL https://ollama.com/install.sh | sh
print('Ollama installation script executed. Verifying installation.')

### 3. Install PCI Utilities for GPU Detection

In [None]:
print('Installing pciutils for GPU detection...')
!sudo apt-get install -y pciutils
print('pciutils installed. Now we need to restart Ollama for it to recognize the GPU.')

### 4. Start Ollama Server

In [None]:
print('Starting Ollama server with GPU detection enabled...')
import os
os.environ['OLLAMA_HOST'] = '0.0.0.0'
get_ipython().system_raw('ollama serve &') # Start Ollama server in the background
print('Ollama server started. It should now detect GPU if available.')

### 5. Pull Code-Generation Model

In [None]:
print('Attempting to pull qwen2.5-coder:7b-instruct-q4_K_M model...')
import subprocess
import time

# Give Ollama a moment to fully start after setting OLLAMA_HOST
time.sleep(10)

# Attempt to pull the primary model
!ollama pull qwen2.5-coder:7b-instruct-q4_K_M

# Check if the primary model was pulled successfully
try:
    result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, check=True)
    ollama_list_output = result.stdout
    print(ollama_list_output)

    if 'qwen2.5-coder:7b-instruct-q4_K_M' in ollama_list_output:
        print('qwen2.5-coder:7b-instruct-q4_K_M model found. No fallback needed.')
    else:
        print('qwen2.5-coder:7b-instruct-q4_K_M model not found. Proceeding to pull fallback model deepseek-coder:6.7b-instruct-q4_K_M.')
        !ollama pull deepseek-coder:6.7b-instruct-q4_K_M
        print('deepseek-coder:6.7b-instruct-q4_K_M pull attempt completed. Please check output for success or failure.')

except subprocess.CalledProcessError as e:
    print(f"Error executing 'ollama list': {e.stderr}")
    print('Could not verify primary model, proceeding to pull fallback model deepseek-coder:6.7b-instruct-q4_K_M.')
    !ollama pull deepseek-coder:6.7b-instruct-q4_K_M
    print('deepseek-coder:6.7b-instruct-q4_K_M pull attempt completed. Please check output for success or failure.')

### 6. Install Cloudflared

In [None]:
print('Installing cloudflared...')
!curl -L --output cloudflared.deb https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb && sudo dpkg -i cloudflared.deb
print('cloudflared installation initiated.')

### 7. Create Cloudflare Tunnel

In [None]:
import time
import re
import os

print('Creating Cloudflare tunnel for Ollama...')
output_file = 'cloudflared_tunnel_output.txt'

# Run cloudflared tunnel in the background, redirecting output to a file
# Using '2>&1' to redirect stderr to stdout, so all messages including the URL are captured
get_ipython().system_raw(f'cloudflared tunnel --url http://localhost:11434 > {output_file} 2>&1 &')

print('Waiting for tunnel to establish and URL to be generated...')
time.sleep(10) # Give some time for the tunnel to establish and print the URL

public_url = None
if os.path.exists(output_file):
    with open(output_file, 'r') as f:
        output_content = f.read()
    print(f'Cloudflared output:\n{output_content}')

    # Regex to find the URL in the cloudflared output
    # The URL typically starts with 'https://' and ends with '.trycloudflare.com'
    match = re.search(r'https://[a-zA-Z0-9-]+\.trycloudflare\.com', output_content)
    if match:
        public_url = match.group(0)

if public_url:
    print(f'\n!!! Your Ollama Public URL is: {public_url} !!!')
    os.environ['OLLAMA_PUBLIC_URL'] = public_url # Store for later use
else:
    print('\nError: Could not find the public Cloudflare tunnel URL in the output.')
    print('Please check the output above for any errors or try running the command manually.')

### 8. Verify Ollama API via Tunnel

In [None]:
import requests
import json
import os

print('Verifying Ollama API via Cloudflare tunnel...')

public_url = os.environ.get('OLLAMA_PUBLIC_URL')

if not public_url:
    print('Error: OLLAMA_PUBLIC_URL not found in environment variables.')
else:
    print(f'Using public URL: {public_url}')
    api_endpoint = f'{public_url}/api/generate'

    payload = {
        "model": "qwen2.5-coder:7b-instruct-q4_K_M",
        "prompt": "Write a simple Python function to add two numbers."
    }

    headers = {'Content-Type': 'application/json'}

    try:
        response = requests.post(api_endpoint, headers=headers, data=json.dumps(payload), timeout=120)
        response.raise_for_status() # Raise an exception for HTTP errors

        print(f"Status Code: {response.status_code}")

        full_response_content = ""
        generated_text = ""
        for line in response.iter_lines():
            if line:
                decoded_line = line.decode('utf-8')
                full_response_content += decoded_line + '\n'
                try:
                    json_response = json.loads(decoded_line)
                    if 'response' in json_response:
                        generated_text += json_response['response']
                    if json_response.get('done'):
                        break
                except json.JSONDecodeError:
                    print(f"Could not decode JSON from line: {decoded_line}")

        print("\n--- Full Ollama API Response (parsed line by line) ---")
        print(full_response_content)
        print("-----------------------------------------------------")

        if generated_text:
            print("\n--- Extracted Generated Text ---")
            print(generated_text)
            print("----------------------------------")
        else:
            print("\nNo generated text found in the response.")

    except requests.exceptions.RequestException as e:
        print(f"Error connecting to Ollama API: {e}")
        if hasattr(e, 'response') and e.response is not None:
            print(f"Response content: {e.response.text}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

### 9. Example Ollama API Call (from External)

In [None]:
import requests
import json
import os

print('Executing Ollama API call to the tunneled URL:')

public_url = os.environ.get('OLLAMA_PUBLIC_URL')

if not public_url:
    print('Error: OLLAMA_PUBLIC_URL not found in environment variables. Please ensure the tunnel was successfully created.')
else:
    ollama_api_url = f"{public_url}/api/generate"

    payload = {
        "model": "qwen2.5-coder:7b-instruct-q4_K_M",
        "prompt": "Write a js code to add two number",
        "stream": True
    }

    headers = {'Content-Type': 'application/json'}

    print(f"Sending request to: {ollama_api_url}")

    try:
        response = requests.post(ollama_api_url, headers=headers, data=json.dumps(payload), stream=True, timeout=120)
        response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)

        print("\nGenerated Code:")
        generated_text = ""
        for line in response.iter_lines():
            if line:
                decoded_line = line.decode('utf-8')
                try:
                    json_response = json.loads(decoded_line)
                    if 'response' in json_response:
                        generated_text += json_response['response']
                    if json_response.get('done'):
                        break
                except json.JSONDecodeError:
                    print(f"Could not decode JSON from line: {decoded_line}")
        print(generated_text)

    except requests.exceptions.RequestException as e:
        print(f"Error calling Ollama API: {e}")
        if hasattr(e, 'response') and e.response is not None:
            print(f"Response content: {e.response.text}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")