In [None]:
!pip install -q openai

In [None]:
import getpass
import openai

openai.api_key = getpass.getpass('OpenAI: API Key')

In [None]:
from typing import Set
from abc import ABC, abstractmethod

class SlackBotMessage(ABC):
    """Defines a base slack bot message. This class is intended to be extended from rather than used.
    """
    @abstractmethod
    def to_request(self):
        pass

class SystemContent(SlackBotMessage):
    """SystemContent are messages that are prefixed to a conversation and provide context to the LLM.
    """
    def __init__(self, content: str):
        self.content = content

    def to_request(self):
        return {"role": "system", "content": self.content}

class QueryContent(SlackBotMessage):
    """QueryContent is a message that specifies the users topic to the chat bot for reaching out.
    """
    def __init__(self, content: str):
        self.content = content
        
    def to_request(self):
        return {"role": "user", "content": f"Who should I reach out to about: {self.content}"}

class SlackMessage(SlackBotMessage):
    """SlackMessage are the messages from Slack that provide additional context to the query.
    """
    def __init__(self, username: str, content: str):
        self.username = username
        self.content = content

    def to_request(self):
        return {"role": "system", "content": f"{self.username}: {self.content}"}

class PostProcessor:
    """PostProcessor parses the response from OpenAI.
    """
    def __init__(self, users: Set[str]):
        self.users = users

    def get_users_from_message(self, message: str) -> Set[str]:
        print(message)
        poc_users = set()
        for user in self.users:
            if user in message:
                poc_users.add(user)
        return poc_users

class SlackBot:
    """SlackBot is the entry-point to the example of Kaskada + OpenAI + LLMs.
    """
    def __init__(self, model_name: str = "gpt-3.5-turbo", max_tokens: int = 25):
        self.model_name = model_name
        self.max_tokens = max_tokens
        self.users = set()
        self.intro_message = SystemContent("You are a helpful assistant designed to suggest the names of people who would best be points of contacts for a specific topic based on messages.")
        self.messages = []
        self.post_processor = PostProcessor(self.users)
    
    def get_subset_users_message(self):
        return SystemContent(f"Only respond as a JSON object with any subset of these usernames who would be very interested, or return an empty set if no one would be interested: {self.users}".replace("[", "").replace("]", "")).to_request()
    
    def get_format_message(self):
        return SystemContent("Messages are formatted as username: topic of message").to_request()
    
    def add_message(self, username: str, content: str):
        message = SlackMessage(username, content)
        self.users.add(username)
        self.messages.append(message.to_request())
        
    def __create_conversion(self, query: QueryContent):
        return [self.intro_message.to_request(),\
                self.get_subset_users_message(),\
                self.get_format_message()] + self.messages + [query.to_request()]
    
    def query(self, query: str) -> Set[str]:
        messages = self.__create_conversion(QueryContent(query))
        conversation = openai.ChatCompletion.create(
          model=self.model_name,
          messages=messages,
          max_tokens=self.max_tokens,
          temperature=0
        )
        response = conversation.choices[0].message.content
        return self.post_processor.get_users_from_message(response)

In [None]:
sample_bot = SlackBot()

In [None]:
# Add some sample messages
sample_bot.add_message("@kevin.nguyen", "Vector search databases are the future for LLMs. They enable to growth and optimizations of queries")
sample_bot.add_message("@ryan.michael", "Kaskada with DataStax enables faster-streaming LLMs than traditional lang-chain models")
sample_bot.add_message("@eric.pinzur", "Helm charts are how we should deploy the future of architecture of microservices")
sample_bot.add_message("@ben.chambers", "Python FFIs and Rust compilation give us a much better experience than our current implementation")
sample_bot.add_message("@jordan.frazier", "here’s list in type inference and index support")

In [None]:
sample_bot.query("FFIs")

In [None]:
sample_bot.query("The database I am using is a vector based implementation derived from Cassandra on Astra. There appears to be a problem with the scale.")

In [None]:
sample_bot.query("Vector search databases")

In [None]:
sample_bot.query("I want to know more about Kaskadas ML")

In [None]:
sample_bot.query("How do I onboard?")

In [None]:
sample_bot.query("Awkward Tacos")

In [None]:
# Example from the last few messages from the Slack Kaskada Eng
# https://datastax.slack.com/archives/C04J75DMUSG/p1690824490676389
kaskada_eng_bot = SlackBot()

In [None]:
# Add all the messages from the recent thread
kaskada_eng_bot.add_message("@ben.chambers", "If yes: then since count(E) ignores null elements, then collect(E) needs to ignore elements")
kaskada_eng_bot.add_message("@ben.chambers", "count(E) == len(collect(E, max=null)) <- should this be true?")
kaskada_eng_bot.add_message("@jordan.frazier", "(i.e. @Ryan Michael Should collect() collect null values into the list?")
kaskada_eng_bot.add_message("@ryan.michael", "That’s an interesting question")
kaskada_eng_bot.add_message("@ben.chambers", "It’s also interesting, because right now most aggregations produce null if they haven’t seen any non-null values. But count produces 0 and collect will produce the empty list. So it feels like we may want a relationship between them.")
kaskada_eng_bot.add_message("@ben.chambers", '''That’s also nice because it lets us do something like:
E.value | if(E.interesting) | collect(max=10)
To collect “10 interesting thnigs”''')
kaskada_eng_bot.add_message("@ben.chambers", '''And we can always put a null value in a non-null struct:
({ E.maybe_null } | collect(max=10)).maybe_null if we want to collect 10 potentially-null values.''')
kaskada_eng_bot.add_message("@jordan.frazier", '''count produces 0 if it only sees null values (since it doesn’t count null).
len(collect()) produces null if it skips null values (contradictory — count(E) != len(collect(E))''')
kaskada_eng_bot.add_message("@ben.chambers", '''Why? Why not have it produces an empty list if it hasn’t seen any values? It doesn’t take any space, and makes it relate to count?''')
kaskada_eng_bot.add_message("@jordan.frazier", '''That’s right — I was thinking of “skipping” as “ignoring” the input entirely, but that doesn’t make sense. If it sees a null it will either produce the current list or the empty list if none exists''')

In [None]:
last_message_on_thread = '''If anybody wants to comment (maybe @Brian Godsey), I added the question to the doc.'''

In [None]:
kaskada_eng_bot.query(last_message_on_thread)

In [None]:
kaskada_eng_bot.query("I think skipping the enitre input is necessary.")

In [None]:
kaskada_eng_bot.query("another random octopus tacos vector me search")

1. Providing the list of users is not a scalable approach
2. We are not going to provide the whole chat history (this is what fine tuning is for)
3. Do we want to allow the model to choose whether or not to return a person or empty set? Should someone always be notified? "Only return a name if you're very confident"
4. Validation metric? Condition in which we create training examples. E.g. if we know a specific history resulted in a choice, then we can rank it. Recognize if there are important people or it just doesn't know.