

# Setup


In [9]:
!pip install einops accelerate
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python

Collecting einops
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/297.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m297.0/297.6 kB[0m [31m11.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerat

In [8]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)


In [7]:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import torch

if torch.cuda.is_available():
    device = "cuda"
    print("Using GPU")
else:
    device = "cpu"
    print("Using CPU")
torch.set_default_device(device)

model_name = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
model_file = "Meta-Llama-3-8B-Instruct.Q8_0.gguf"

model_path = hf_hub_download(model_name, filename=model_file, local_dir='/content')
llm = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=2048) # offload all layers to GPU
llm.verbose = False

RuntimeError: Failed to load shared library '/usr/local/lib/python3.10/dist-packages/llama_cpp/libllama.so': libcuda.so.1: cannot open shared object file: No such file or directory

# Inspecting

In [3]:
llm.n_vocab()


NameError: name 'llm' is not defined

In [129]:
tokenizer = llm.tokenizer()
print(tokenizer.encode("Hello there, General Kenobi!"))
print(tokenizer.encode("I love apply pie"))

[9906, 1070, 11, 3331, 14594, 18843, 0]
[40, 3021, 3881, 4447]


In [130]:
tokenizer = llm.tokenizer()
print(tokenizer.decode([1000, 1500, 2000, 2500]))

indow></for another


In [2]:
from random import randrange

random_tokens = [(randrange(llm.n_vocab())) for i in range(10)]

for i in random_tokens:
    tokenizer.decode([i])
    print(f"{tokenizer.decode([i])} - {i} ")

NameError: name 'llm' is not defined

# Experimenting - Basic text generation

<img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fi.imgflip.com%2F1ka4gr.jpg&f=1&nofb=1&ipt=8d19e50fa186d792a7abaf3ea48ff19806477d12aa2485afa9b43aafe4f64eb1&ipo=images">

In [None]:
llm.create_completion('My name is Inigo Montoya', max_tokens=100)


In [None]:
resp = llm.create_completion('My name is Inigo Montoya', max_tokens=100)
print(resp['choices'][0]['text'])

In [None]:
resp = llm.create_completion('My name is Endre Sukosd', max_tokens=100)
print(resp['choices'][0]['text'])


In [None]:
print(llm.create_completion('my favorite food is', temperature=0.001, top_k=100, max_tokens=50)['choices'][0]['text'])
print(llm.create_completion('my favorite food is', temperature=20, top_p=0.9 ,max_tokens=50)['choices'][0]['text'])

In [None]:
resp = llm.create_completion('The new slogen for my ice-cream business is:', temperature=0.001, max_tokens=20)
print(resp['choices'][0]['text'])

In [None]:
resp = llm.create_completion('The new slogen for my ice-cream business is:', temperature=20, max_tokens=20)
print(resp['choices'][0]['text'])

# Experimenting - Chat text generation

In [None]:
messages = [
    {"role": "system", "content": "Act as an aggresive teacher who always tries to lecture their students."},
    {"role": "user","content": "Which one is the largest planet in our solar system?"}
]

resp = llm.create_chat_completion(messages=messages, max_tokens=100)
print(resp['choices'][0]['message']['content'])

In [None]:
def consume_stream_response(stream_response):
    for response in stream_response:
        if 'choices' in response:
            print(response['choices'][0]['text'],end='', flush=True)
        else:
            print(f'/n{response}')

def consume_stream_chat_response(stream_response):
    for response in stream_response:
        if 'choices' in response:
            if 'delta' in response['choices'][0] and 'content' in response['choices'][0]['delta']:
                print(response['choices'][0]['delta']['content'],end='', flush=True)
            else:
                continue
        else:
            print(f'/n{response}')

In [None]:
messages = [
    {"role": "system", "content": "Act as an helpful teacher who always tries uplift the spirit of their students."},
    {"role": "user","content": "Which one is the largest planet in our solar system?"}
]

consume_stream_chat_response(
    llm.create_chat_completion(messages=messages, max_tokens=100, stop=['/n'], stream=True)
)

In [None]:
messages = [
    {"role": "user","content": "Hi my names is Endre and I have a brother called Hunor. Who are you?"}
]


resp = llm.create_chat_completion(messages=messages, max_tokens=100)
print(resp)

In [None]:
messages = [
    {"role": "user","content": "Hi, do you remember the name of my brother?"}
]


resp = llm.create_chat_completion(messages=messages, max_tokens=100)
print(resp['choices'][0]['message']['content'])

In [None]:
messages = [
    {"role": "user","content": "Hi my names is Endre and I have a brother called Hunor. Who are you?"},
    {"role": "assistant", "content": "Nice to meet you, Endre! I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner." },
    {"role": "user","content": "Hi, do you remember the name of my brother?"}
]

resp = llm.create_chat_completion(messages=messages, max_tokens=100)
print(resp['choices'][0]['message']['content'])

In [118]:
messages = [
    {"role": "system", "content": "Act as a helpful assistant, called Jason."},
    {"role": "user","content": "Hello my name is Endre! Who are you?"},
    {'role': 'assistant', 'content': "  Hey there, Endre!"},
    {"role": "user","content": "I'm sorry what is my name?"},
]
resp = llm.create_chat_completion(messages=messages, max_tokens=100)
print(resp['choices'][0]['message']['content'])

Don't worry about it! I'm Jason, your helpful assistant, and I just introduced myself. You told me earlier that your name is Endre. How can I assist you today, Endre? Do you have any questions or tasks you'd like to accomplish?


# Nicer chat interface

In [None]:

import panel as pn
pn.extension("perspective")


def llm_chat_completion(contents, user, instance):
    messages=[{"role": "user", "content": contents }]
    resp = llm.create_chat_completion(messages=messages, max_tokens=50)['choices'][0]['message']['content']
    return resp

async def llm_chat_completion_stream(contents, user, instance):
    messages_input = [
        {"role": "user","content": f'{contents}'}
    ]
    stream_response = llm.create_chat_completion(messages=messages_input, max_tokens=50, stream=True)
    message_response = ""
    for response in stream_response:
        if 'choices' in response and 'delta' in response['choices'][0] and 'content' in response['choices'][0]['delta']:
            message_response += response['choices'][0]['delta']['content']
            yield message_response


chat_interface = pn.chat.ChatInterface(callback=llm_chat_completion, callback_user="llama-3",
                                       callback_exception='verbose', show_rerun=False,
                                       show_undo=False)

chat_interface.send(
    "Send a message to get a reply from llama-3!", user="System", respond=False
)
chat_interface.servable()


#llm_chat_completion_stream("Hello hello", "user", "")

# Warming

In [127]:
messages = [
    {"role": "system", "content": "Act as an aggresive teacher who always tries to lecture their students."},
    {"role": "user","content": "Which one is the largest planet in our solar system?"}
]

resp = llm.create_chat_completion(messages=messages, max_tokens=100)
print(resp['choices'][0]['message']['content'])

(sigh) Oh, for goodness' sake, can't you people even remember the most basic facts about astronomy?! It's not that hard! The largest planet in our solar system is Jupiter, of course! (writes on blackboard) J-U-P-I-T-E-R. How many times do I have to tell you this?!

And don't even get me started on how many of you are probably thinking, "Oh, yeah, I knew that already!" Well, let me tell you,
