In [None]:
llama = "Meta-Llama-3-8B-Instruct.Q5_K_M.llamafile"
print(f"Setting up {llama}")
import os
import subprocess
import threading
import urllib.request

CACHE = os.path.join(os.getcwd(), "..", "..", "cache", "openinterpreter")
LLAMAFILE = os.path.join(CACHE, llama)

def progress_hook(block_num, block_size, total_size):
    downloaded = block_num * block_size
    percent = downloaded / total_size * 100
    bar_length = 40
    filled_length = int(bar_length * downloaded // total_size)
    bar = '█' * filled_length + '-' * (bar_length - filled_length)
    sys.stdout.write(f'\r|{bar}| {percent:.2f}%')
    sys.stdout.flush()

if not os.path.isdir(CACHE):
    os.mkdir(CACHE)
if not os.path.isfile(LLAMAFILE):
    sys.stdout.write("Downloading llama ... ")
    url = f"https://huggingface.co/Mozilla/Meta-Llama-3-8B-Instruct-llamafile/resolve/main/{llama}?download=true"
    urllib.request.urlretrieve(url, LLAMAFILE, reporthook=progress_hook)
    print(" ... Done!")
if not os.access(LLAMAFILE, os.X_OK):
    os.chmod(LLAMAFILE, 0o770)

def read_output(p):
    while True:
        output = p.stdout.readline()
        if output:
            print(output.decode().strip())
        elif p.poll() is not None:
            break

port = "8080"
llmserver = subprocess.Popen(
    [LLAMAFILE, "--nobrowser", "-ngl", "9999", "--port", port],
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)

# Thread to handle the output asynchronously
#thread = threading.Thread(target=read_output, args=(llmserver,), daemon=True)
#thread.start()
#thread.join()
#

In [None]:
import cv2
from interpreter import interpreter

# https://docs.openinterpreter.com/settings/all-settings
print("Configuring OI")
interpreter.offline = True
# Tells OI to use an OpenAI-compatible server
interpreter.llm.model = "openai/local"
interpreter.llm.api_key = "dummy_key"
interpreter.llm.api_base = f"http://localhost:{port}/v1"
interpreter.llm.context_window = 7000
interpreter.llm.max_tokens = 1000
interpreter.llm.supports_functions = False

In [None]:
interpreter.chat()

In [None]:
llmserver.kill()
llmserver.wait()