# Slackbot Example

SlackBot keeps you in the loop without disturbing your focus. Its personalized, intelligent AI continuously monitors your Slack workspace, alerting you to important conversations and freeing you to concentrate on what’s most important.

SlackBot reads the full history of your (public) Slack workspace and trains a Generative AI model to predict when you need to engage with a conversation. This training process gives the AI a deep understanding of your interests, expertise, and relationships. Using this understanding, SlackBot watches conversations in real-time and notifies you when an important conversation is happening without you. With SlackBot200 you can focus on getting things done without worrying about missing out.

In this notebook, you’ll see you how to build and deploy SlackBot in 15 minutes using only OpenAI’s API’s and open-source Python libraries - Data Science PhD not required.



In [None]:
!pip install openai kaskada

In [None]:
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
import openai
import kaskada as k

def prompt(messages):
    last_message = messages.lag(1)
    since_last_message = messages.time().seconds_since(last_message.time())
    conversation_start = since_last_message > k.minutes(5)

    k.record({
        # A list of all messages over the past 10 minutes (up to 100)
        "recent_messages": messages
            .select("user", "type", "text")
            .collect(window=since(conversation_start), max=100),

        # How many messages have been reacted to in the conversation
        "reaction_count": messages
            .when(messages["reactions"].is_not_null())
            .count(window=since(conversation_start)),
    })

def examples(messages):
    # We'll train ChatGPT to generate the user ID who will engage next
    k.make_record(
        # For each example, use the previous prompt
        prompt=prompt(messages).lag(1),

        # ...and the current user ID
        completion=completion["user_id"],
    )

def main():
    # Initialize Kaskada with a local execution context.
    k.init_session()

    tl = examples(messages = k.source.read_parquet(files=["./messages.parquet"]))
    tl = tl.filter(tl["prompt"].is_valid())
    examples_df = tl.run().to_pandas()

    # Split training & validation

    train, valid = train_test_split(examples_df, test_size=0.2, random_state=42)
    train.to_json("train.jsonl", orient='records', lines=True)
    valid.to_json("valid.jsonl", orient='records', lines=True)

    # Fine tune a model
    !openai api fine_tunes.create -t "train.jsonl" -v "valid.jsonl"

    # Apply in real-time
    live_messages = k.StreamSource()

    # TODO: Spawn a Slack listener

    # Handle messages in realtime
    for p in prompt(live_messages).run(starting=datetime.now()).to_generator():
        completions = openai.ChatCompletion.create(
            model="ft-2zaA7qi0rxJduWQpdvOvmGn3", 
            messages=[{"role": "user", "content": p}],
            n=5, logprobs=5,
        )
        for completion in completions:
            if sum(completion.logprobs) > 0.05:
                user = completion.completion
                # Use the Slack API to PM the user?
                print(f"Notify {user} of conversation: \n{p}") 

## Legacy Implementation


In [None]:
%load_ext fenlmagic

In [None]:
from kaskada.api.session import LocalBuilder
from kaskada import table

session = LocalBuilder().build()

In [None]:

table.delete_table("Message")

table.create_table(
  # The table's name
  table_name = "Message",
  # The name of the column in the data that contains the time associated with each row
  time_column_name = "ts",
  # The name of the column in the data that contains the entity key associated with each row
  entity_key_column_name = "channel",
  grouping_id = "Channel",
)


In [None]:

table.load(table_name = "Message", file = "messages.parquet")

In [None]:
%%fenl
Message

In [None]:
%%fenl
let message = Message | when(Message.subtype == "message")

let prompt = {
    # TODO: List rather than last
    recent_messages: message | select_fields($input, "user", "text") | last(),

    # # TODO: List support (for reactions field)
    # engage_count: message
    # | when(message.reactions | is_valid())
    # | count(window=sliding(10, minutely())),

    # TODO: the time of day
    time: time_of(message), 
}

let completion = message.user

let examples = {
    prompt: prompt, # TODO: Support lag: | lag(1),
    completion: completion,
}

in examples | when($input.prompt | is_valid())

In [None]:
# Split training & validation
from sklearn.model_selection import train_test_split
train, valid = train_test_split(
  examples.dataframe,
  test_size=0.2, random_state=42
)
train.to_json("train.jsonl", orient='records', lines=True)
valid.to_json("valid.jsonl", orient='records', lines=True)

# Fine tune a model
!openai api fine_tunes.create -t "train.jsonl" -v "valid.jsonl"