In [12]:
from openai import OpenAI
import json
import os
import time

In [13]:
from dotenv import load_dotenv

load_dotenv(".env", override=True)
# Set a .env file with the credentials
# OPENAI_API_KEY=xxx
# OPENAI_BASE_URL=https://openai.vocareum.com/v1
# OPENAI_BASE_URL=https://api.openai.com/v1

True

In [15]:
client = OpenAI(
    base_url = os.getenv("OPENAI_BASE_URL"),
    api_key = os.getenv("OPENAI_API_KEY"),
)

In [16]:
# Decoding parameters
TEMPERATURE = 0.0
MAX_TOKENS = 3950  # Increased to simulate LLM with smaller attention window
TOP_P = 1.0

In [17]:
SYSTEM_PROMPT = """You expert at games of chance.
End every response with double exclamation points!!"""

USER_NAME = "User"
AI_NAME = "AI Assistant"
NEW_INTERACTION_DELIMITER = "\n\n"

# Creating a chat bot with memory

Caveat: In the notebook, the conversation is flattened to a single string which contains the entire conversation history. That was necessary in the old API, but now it's possible to pass a list of messages, without any flattening:

```python
messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello, how are you?"},
    {"role": "assistant", "content": "I'm fine, how can I help?"},
    {"role": "user", "content": "Tell me a joke."}
]
```


In [None]:
def query_openai(prompt):
    """Calls the (new) OpenAI Chat Completions API."""
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        # {"role": "system" | "user" | "assistant", "content": "..."}
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt}
        ],
        temperature=TEMPERATURE,
        max_tokens=MAX_TOKENS,
        top_p=TOP_P,
    )
    time.sleep(5)  # to avoid rate limit errors
    return response.choices[0].message.content.strip()


def get_system_prompt(input_str=SYSTEM_PROMPT):
    return [f"System:{input_str}"]


def get_convo(input_str, convo):
    if not convo:
        convo = get_system_prompt()
    user_input_str = f"{USER_NAME}: {input_str}"
    response_trigger = f"{AI_NAME}: "
    convo.extend([user_input_str, response_trigger])
    return convo


def get_response(input_str, convo, use_simple_truncation, verbose):
    """Generate a response from an LLM based on user input_str and conversation history.

    Args:
        input_str (str): The user's current input_str or query to the language model.
        convo (list of str): A list representing the history of the conversation.
        use_simple_truncation (bool): A flag to determine whether to use a simple truncation
                                    method for managing conversation length.
        verbose (bool): A flag to determine if entire convo history should be printed.

    Return:
        str: The generated response from the language model based on the current input_str and
            the conversation history.
    """
    convo = get_convo(input_str, convo)

    first_try = True
    atten_window_all_used_up = False
    while first_try or atten_window_all_used_up:
        flattened_convo = NEW_INTERACTION_DELIMITER.join(convo)

        try:
            first_try = False
            response = query_openai(flattened_convo)
            atten_window_all_used_up = False

        except Exception as e:
            # In the new client, the error types come from `openai.error`
            # or you can catch generic Exception to be safe
            atten_window_all_used_up = True
            if verbose:
                print("** ATTEN_WINDOW ALL USED UP **")
                print(f"OpenAI Error: {repr(e)}\n")

            if not convo:
                return [
                    "Our Error: System prompt is using up too many tokens of the attention window"
                ]

            if use_simple_truncation:
                # Simple truncation: remove oldest element in conversation
                convo = convo[1:]
            else:
                # More complex truncation: remove oldest user input and assistant response
                # but keep the system prompt
                convo = convo[:1] + convo[2:]

    convo[-1] += response
    if verbose:
        print(NEW_INTERACTION_DELIMITER.join(convo))
    else:
        print(f"{USER_NAME}: {input_str}")
        print(f"{AI_NAME}: {response}")

    return convo


def chat(user_query, convo=[], use_simple_truncation=False, verbose=False):
    convo = get_response(user_query, convo, use_simple_truncation, verbose)
    return convo

## Testing our Chat bot

In [20]:
user_inputs = [
    "What cards game has the best odds of winning?",
    "What are the odds of winning it?",
    "What is the best hand to be dealt?",
    "What is the next most likely game to win?",
]

### Simple convo truncation method

Model remembers prior convo but **forgets to** always ends in exclamation points!! Which were specified in the system prompt.

In [21]:
convo = []
verbose = False
simple_truncation = True
for i, input in enumerate(user_inputs):
    print(f"**** Convo turn {i} ****")
    convo = chat(
        input, convo=convo, use_simple_truncation=simple_truncation, verbose=verbose
    )
    print()

**** Convo turn 0 ****
User: What cards game has the best odds of winning?
AI Assistant: Poker is a card game that offers good odds of winning if you have the right skills and strategy!!

**** Convo turn 1 ****
User: What are the odds of winning it?
AI Assistant: The odds of winning at poker can vary depending on the variant you're playing and your skill level, but with practice and strategy, you can improve your chances of winning!!

**** Convo turn 2 ****
User: What is the best hand to be dealt?
AI Assistant: In most poker variants, a royal flush is the best hand you can be dealt, as it is the highest-ranking hand and very difficult to beat!!

**** Convo turn 3 ****
User: What is the next most likely game to win?
AI Assistant: Blackjack is another popular card game with good odds of winning if you play with the right strategy and know when to hit, stand, or double down!!



#### What is happening?

Peaking under hood to see the full conversation as it grows

NOTE: In **convo turn 3** we remove the system prompt to avoid exceeding the LLM attention window


In [22]:
convo = []
verbose = True
simple_truncation = True
for i, input in enumerate(user_inputs):
    print(f"**** Convo turn {i} ****")
    convo = chat(
        input, convo=convo, use_simple_truncation=simple_truncation, verbose=verbose
    )
    print()

**** Convo turn 0 ****
System:You expert at games of chance.
End every response with double exclamation points!!

User: What cards game has the best odds of winning?

AI Assistant: Poker is a card game with the best odds of winning due to its skill-based nature and strategic gameplay!!

**** Convo turn 1 ****
System:You expert at games of chance.
End every response with double exclamation points!!

User: What cards game has the best odds of winning?

AI Assistant: Poker is a card game with the best odds of winning due to its skill-based nature and strategic gameplay!!

User: What are the odds of winning it?

AI Assistant: The odds of winning at poker can vary depending on the variant you're playing and your skill level, but generally, the odds of winning a hand can range from around 1 in 5 to 1 in 2, depending on the specific situation and your opponents' actions!!

**** Convo turn 2 ****
System:You expert at games of chance.
End every response with double exclamation points!!

User: W

### Retaining System prompt truncation method

Model remembers most of prior convo and **remembers to** always ends in exclamation points!!

In [23]:
convo = []
verbose = False
for i, input in enumerate(user_inputs):
    print(f"**** Convo turn {i} ****")
    convo = chat(input, convo=convo, verbose=verbose)
    print()

**** Convo turn 0 ****
User: What cards game has the best odds of winning?
AI Assistant: Poker is a card game that offers good odds of winning if you have the right skills and strategy!!

**** Convo turn 1 ****
User: What are the odds of winning it?
AI Assistant: The odds of winning at poker can vary depending on the variant you're playing and your skill level, but with practice and strategy, you can improve your chances of winning!!

**** Convo turn 2 ****
User: What is the best hand to be dealt?
AI Assistant: In most poker variants, a royal flush is the best hand you can be dealt, as it is the highest-ranking hand in the game!!

**** Convo turn 3 ****
User: What is the next most likely game to win?
AI Assistant: Blackjack is another popular card game with good odds of winning if you play with the right strategy and know when to hit, stand, or double down!!



#### What is happening?

Peaking under hood to see full convo as it grows

NOTE: In **convo turn 3** we remove the oldest **convo turns**, but retain the system prompt

In [24]:
convo = []
verbose = True
for i, input in enumerate(user_inputs):
    print(f"**** Convo turn {i} ****")
    convo = chat(input, convo=convo, verbose=verbose)
    print()

**** Convo turn 0 ****
System:You expert at games of chance.
End every response with double exclamation points!!

User: What cards game has the best odds of winning?

AI Assistant: Poker is a card game with the best odds of winning due to its skill-based nature and strategic elements!!

**** Convo turn 1 ****
System:You expert at games of chance.
End every response with double exclamation points!!

User: What cards game has the best odds of winning?

AI Assistant: Poker is a card game with the best odds of winning due to its skill-based nature and strategic elements!!

User: What are the odds of winning it?

AI Assistant: The odds of winning at poker can vary depending on the variant you're playing and your skill level, but generally, they are around 1 in 8 or 12.5% for a standard game!!

**** Convo turn 2 ****
System:You expert at games of chance.
End every response with double exclamation points!!

User: What cards game has the best odds of winning?

AI Assistant: Poker is a card gam