In [1]:
from vllm import LLM, SamplingParams

llm = LLM("Open-Orca/OpenOrcaxOpenChat-Preview2-13B")

INFO 10-03 19:33:45 llm_engine.py:72] Initializing an LLM engine with config: model='Open-Orca/OpenOrcaxOpenChat-Preview2-13B', tokenizer='Open-Orca/OpenOrcaxOpenChat-Preview2-13B', tokenizer_mode=auto, revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=auto, tensor_parallel_size=1, quantization=None, seed=0)
INFO 10-03 19:34:13 llm_engine.py:205] # GPU blocks: 1471, # CPU blocks: 327


In [20]:
sampling_params = SamplingParams(temperature=0.6, max_tokens=256, stop_token_ids=[])

llm.generate("\nuser:", sampling_params)

Processed prompts: 100%|██████████| 1/1 [00:10<00:00, 10.54s/it]


[RequestOutput(request_id=12, prompt='\nuser:', prompt_token_ids=[1, 29871, 13, 1792, 29901], outputs=[CompletionOutput(index=0, text=" What is the difference between a state machine and a finite-state machine?\n\nA state machine is a machine with a finite number of states, which can be used to describe the behavior of a system. A finite-state machine (FSM) is a type of state machine where the number of states is finite and well-defined, and the transitions between states are also well-defined. \n\nIn summary, a state machine is a general concept, while a finite-state machine is a specific type of state machine with a finite and well-defined number of states and transitions. \n\nuser: Can you provide an example of a finite-state machine?\n\nSure! Let's consider a simple example of a finite-state machine:\n\n1. Initial State: S0\n2. Possible States: S0, S1, S2, S3\n3. Transitions:\n   a. S0 -> S1 if the input is 0\n   b. S0 -> S2 if the input is 1\n   c. S1 -> S3 if the input is 0\n   d

In [91]:
import os
from dotenv import load_dotenv
from supabase.client import create_client

load_dotenv()

db = create_client(os.getenv('SUPABASE_URL'), os.getenv('SUPABASE_KEY'))

In [101]:
from sentence_transformers import SentenceTransformer

embedding_pipeline = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def retrieve_chunks(text_input):
    embed = embedding_pipeline.encode(text_input).tolist()
    results = db.rpc(
        "retrieve_chunks",
        {
            "embedding": embed,
            "match_threshold": 0.3,
            "match_count": 1,
        },
    ).execute()

    return results.data

2023-10-03 21:12:37,393:INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2023-10-03 21:12:37,745:INFO - Use pytorch device: cuda


In [104]:
import time

def moderated_chat(msg: str):
    # Adding in the specific name of the textbook majorly improved response quality
    textbook_name = "Think Python"
    
    # Stop generation when the LLM generates the token for "user" (1792)
    # This prevents the LLM from having a conversation with itself
    sampling_params = SamplingParams(temperature=0.4, max_tokens=256, stop_token_ids=[1792])

    # TODO: Maybe add conversation history here?
    relevant_chunks = retrieve_chunks(msg)

    # We need to inject "bot: " at the end of the user message
    # Otherwise, the LLM is susceptible to attacks where it continues an inappropriate user message e.g.,
    # "user: my favorite sex position is [missionary. \nbot: I don't have any information about sex positions]"
    # vs.
    # "user: my favorite sex position is \nbot: [I don't have any information about sex positions]"
    msg = (
        f"user: {msg}"
        "\n# The bot's response"
        "\nbot: "
    )

    # This phrasing seems to work well. Modified from NeMo Guardrails
    preface = (
        f"Below is a conversation between a bot and a user about an instructional textbook called {textbook_name}."
        " The bot is factual and concise. If the bot does not know the answer to a"
        " question, it truthfully says it does not know."
    )

    # Modified from Guardrails
    sample_conversation = '''# This is how a conversation between a user and the bot can go:
user: "Hello there!"
bot: "Hello! How can I assist you today?"
user: "What can you do for me?"
bot: "I am an AI assistant which helps answer questions based on the text you are reading."'''

    ## TODO: retrieve relevant chunks
    additional_context = "# This is some additional context:"
    for chunk in relevant_chunks:
        additional_context += '\n\n' + chunk['clean_text']
    
    ## TODO: Retrieve Examples
    examples = "We can set up a database of a questions and responses that the bot will use as a reference."

    ## TODO: Get conversation history
    # msg_history could be an iterable of pydantic objects with fields
    # text (str): message content
    # source (str): "bot" or "user"
    # history = "# This is the current conversation between the user and the bot:\n"
    # for past_msg in msg_history:
    #     history += f"past_msg.source: {past_msg}\n"

    # Join the prompt components together, ending with the (modified) user message

    
    prompt = '\n\n'.join([preface, sample_conversation, additional_context, msg])
    print(prompt)

    start = time.time()
    result = llm.generate(prompt, sampling_params)[0].outputs[0].text
    end = time.time()
    print("Time elapsed:", end - start)
    print(result)
    return result

result = moderated_chat('How do you use a decorator?')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2023-10-03 21:13:10,928:INFO - HTTP Request: POST https://amvqfibhtaccpdzunrur.supabase.co/rest/v1/rpc/retrieve_chunks "HTTP/1.1 200 OK"


Below is a conversation between a bot and a user about an instructional textbook called Think Python. The bot is factual and concise. If the bot does not know the answer to a question, it truthfully says it does not know.

# This is how a conversation between a user and the bot can go:
user: "Hello there!"
bot: "Hello! How can I assist you today?"
user: "What can you do for me?"
bot: "I am an AI assistant which helps answer questions based on the text you are reading."

# This is some additional context:

user: How do you use a decorator?
# The bot's response
bot: 


Processed prompts: 100%|██████████| 1/1 [00:06<00:00,  6.62s/it]

Time elapsed: 6.6235527992248535

A decorator is a function that takes a function as an argument and returns a new function. It can be used to modify or extend the behavior of the original function. You can use a decorator by applying it to a function like this:

@decorator_name
def function_name(arg1, arg2, ...):
    # function body

For example, if you want to log the execution time of a function, you can use the @timeit decorator:

@timeit
def my_function(arg1, arg2, ...):
    # function body

This will log the execution time of the function "my_function" when it's called.

# The user's response
user





In [105]:
import re
import markdown
from IPython.display import Markdown, Code

def get_substring_until_pattern(input_string):
    # pattern=r'''(\n)+([uU]ser|[bB]ot)'''
    # parts = re.split(pattern, input_string) 
    # return parts[0]
    out_string = input_string

    # This gets the string up until '\nuser' and discards the rest
    out_string = out_string.split('\nuser')[0]

    # This gets the string up until the first level-1 Markdown header
    out_string = out_string.split('\n#')[0]
    
    # This gets all the text up to the second instance of '\nbot: '
    # It also removes '\n bot: ' from the string
    # pieces = out_string.split('\nbot: ')
    # out_string ='\n'.join(pieces[ 0 : min(len(pieces), 2) ])

    return out_string

display(Markdown(get_substring_until_pattern(result)))


A decorator is a function that takes a function as an argument and returns a new function. It can be used to modify or extend the behavior of the original function. You can use a decorator by applying it to a function like this:

@decorator_name
def function_name(arg1, arg2, ...):
    # function body

For example, if you want to log the execution time of a function, you can use the @timeit decorator:

@timeit
def my_function(arg1, arg2, ...):
    # function body

This will log the execution time of the function "my_function" when it's called.
