### Book2 - Quick Demos:
 - suppressing stdout output
 - tokenization

In [6]:
import ctypes
from llama_cpp import llama_log_set
from llama_cpp import Llama

In [None]:
# suppress log output from llama.cpp
def my_log_callback(level, message, user_data):
    pass
log_callback = ctypes.CFUNCTYPE(None, ctypes.c_int, ctypes.c_char_p, ctypes.c_void_p)(my_log_callback)
llama_log_set(log_callback, ctypes.c_void_p())

### Tokenize example
Most common color are represented with one token, some need two.

In [24]:
llama_fn = '/mnt/disk2/llamas/TheBloke7B/llama-2-7b.Q4_K_M.gguf'
mistral_fn = '/mnt/disk2/llamas/mistral-7B-v0.1/ggml-model-q4_0.gguf'

colors = ['red', 'green', 'blue', 'yellow', 'orange', 
            'purple', 'pink', 'brown', 'black', 'white'
]

In [25]:
def show_tokenization(model_fn, input_text):
    llm = Llama(model_fn, vocab_only=True, verbose=False)
    
    b_colors = [c.encode('utf-8') for c in input_text]
    tok_colors = [llm.tokenize(text=color, add_bos=False) for color in b_colors]
    detok_colors = [
        tuple(
            llm.tokenizer().decode(tokens=[tok]) 
            for tok in tok_color
        ) 
        for tok_color in tok_colors
    ]
    return [e for e in zip(colors, tok_colors, detok_colors)]

In [26]:
show_tokenization(llama_fn, colors)

[('red', [2654], (' red',)),
 ('green', [7933], (' green',)),
 ('blue', [7254], (' blue',)),
 ('yellow', [13328], (' yellow',)),
 ('orange', [24841], (' orange',)),
 ('purple', [3708, 552], (' pur', 'ple')),
 ('pink', [282, 682], (' p', 'ink')),
 ('brown', [17354], (' brown',)),
 ('black', [4628], (' black',)),
 ('white', [4796], (' white',))]

In [27]:
show_tokenization(mistral_fn, colors)

[('red', [2760], (' red',)),
 ('green', [5344], (' green',)),
 ('blue', [5045], (' blue',)),
 ('yellow', [9684], (' yellow',)),
 ('orange', [14545], (' orange',)),
 ('purple', [19435], (' purple',)),
 ('pink', [12937], (' pink',)),
 ('brown', [9060], (' brown',)),
 ('black', [2687], (' black',)),
 ('white', [3075], (' white',))]

### Embeddings and other findings

In [31]:
llama_model = Llama(llama_fn, vocab_only=True)
mistral_model = Llama(mistral_fn, vocab_only=True)

llama_model.n_vocab(), mistral_model.n_vocab()

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


(32000, 32000)

In [48]:
msg = 'purple and gold'
model = Llama(llama_fn, embedding=True)
msg_embed = model.embed(msg)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


In [49]:
print( f"length: {len(msg_embed)}" )
msg_embed[:10]

length: 4096


[0.26421090960502625,
 -0.2617086172103882,
 -1.126269817352295,
 0.6033214926719666,
 0.6219836473464966,
 -1.0355020761489868,
 0.37359991669654846,
 -0.3784862756729126,
 5.308225154876709,
 1.748565912246704]

### Graveyard of of suppress_output wrappers

In [None]:
from llama_cpp._utils import suppress_stdout_stderr

def foo():
    with suppress_stdout_stderr():
        llm = Llama(model_fn, vocab_only=True, verbose=False)        
        return llm

In [None]:
import sys
import contextlib
import os

@contextlib.contextmanager
def suppress_output():
    new_stdout = open(os.devnull, 'w')
    new_stderr = open(os.devnull, 'w')
    old_stdout = sys.stdout
    old_stderr = sys.stderr
    try:
        sys.stdout = new_stdout
        sys.stderr = new_stderr
        yield
    finally:
        sys.stdout = old_stdout
        sys.stderr = old_stderr
        new_stdout.close()
        new_stderr.close()


In [None]:
def suppress_stdout(func):
    def wrapper(*args, **kwargs):
        sys.stdout = open('/dev/null', 'w')
        func(*args, **kwargs)
        sys.stdout = sys.__stdout__

    return wrapper