# Slackbot Example

SlackBot keeps you in the loop without disturbing your focus. Its personalized, intelligent AI continuously monitors your Slack workspace, alerting you to important conversations and freeing you to concentrate on what’s most important.

SlackBot reads the full history of your (public) Slack workspace and trains a Generative AI model to predict when you need to engage with a conversation. This training process gives the AI a deep understanding of your interests, expertise, and relationships. Using this understanding, SlackBot watches conversations in real-time and notifies you when an important conversation is happening without you. With SlackBot200 you can focus on getting things done without worrying about missing out.

In this notebook, you’ll see you how to build and deploy SlackBot in 15 minutes using only OpenAI’s API’s and open-source Python libraries - Data Science PhD not required.



In [1]:
!pip install openai kaskada

Collecting openai
  Using cached openai-0.27.8-py3-none-any.whl (73 kB)
Collecting kaskada
  Using cached kaskada-0.5.1-py3-none-any.whl (106 kB)
Collecting tqdm
  Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)
Collecting aiohttp
  Downloading aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl (339 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m339.6/339.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting certifi<2023.0.0,>=2022.12.7
  Using cached certifi-2022.12.7-py3-none-any.whl (155 kB)
Collecting domonic<0.10.0,>=0.9.11
  Using cached domonic-0.9.11-py3-none-any.whl (313 kB)
Collecting googleapis-common-protos<2.0.0,>=1.58.0
  Using cached googleapis_common_protos-1.60.0-py2.py3-none-any.whl (227 kB)
Collecting grpcio<2.0.0,>=1.51.1
  Downloading grpcio-1.56.2-cp311-cp311-macosx_10_10_universal2.whl (8.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m00:01

In [2]:
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from slack_sdk.socket_mode import SocketModeClient
import openai
import sparrow_pi as k
import sparrow_pi.sources as sources
import openai
import getpass
import pyarrow

# Initialize Kaskada with a local execution context.
k.init_session()

# Initialize OpenAI
openai.api_key = getpass.getpass('OpenAI: API Key')

# Initialize Slack
slack = SocketModeClient(
    app_token=getpass.getpass('Slack: App Token'),
    web_client=getpass.getpass('Slack: Bot Token'),
)

ModuleNotFoundError: No module named 'sklearn'

## Prompt Engineering

In [None]:
SYSTEM_CONTEXT = """
You are a helpful assistant designed to suggest the Slack usernames of 
people who need to know about a Slack conversation.

Only respond as a JSON list containing the Slack usernames of people to notify 
of the conversation, or return an empty list if no should be notified.

The Slack conversation is as follows, formatted as a JSON object:

""" 

In [None]:
def prompt(messages):
    # A conversation starts when a new messages is more than 5 minutes after the previous message
    #last_message = messages.lag(1)
    #since_last_message = messages.time().seconds_since(last_message.time())
    #conversation_start = since_last_message > k.minutes(5)

    k.record({
        # A list of all messages over the past 10 minutes (up to 100)
        "recent_messages": messages
            #.select("user", "type", "text")
            .select(False, "user", "subtype", "text")
            #.collect(window=since(conversation_start), max=100),
            .last(),

        # How many messages have been reacted to in the conversation
        "reaction_count": messages
            #.filter(messages["reactions"].is_not_null())
            #.count(window=since(conversation_start)),
            ["reply_count"].sum(),
    })

In [None]:
def examples(messages):
    # We'll train ChatGPT to generate the user ID who will engage next
    k.record({
        # For each example, use the previous prompt
        "prompt": prompt(messages)
            #.lag(1),
            .last(),

        # ...and the current user ID
        "completion": messages["user"],
    })

In [None]:
# Format the data for OpenAI
def format_prompt(prompt):
    return SYSTEM_CONTEXT + json.dumps(prompt) + "\n\n###\n\n"
def format_completion(completion):
    return completion + "###"

## Fine-tune the model

In [None]:
# Compute examples from historical data
#tl = examples(messages = k.source.read_parquet(
#   entity_column="channel", 
#    time_column="ts", 
#    files=["./messages.parquet"]))
tl = examples(sources.ArrowSource("ts", "channel", pandas.read_parquet("./messages.parquet")))


# Limit to the examples we want to use for training
#tl = tl.filter(tl["prompt"].is_not_null())
#examples_df = tl.run().to_pandas()
examples_df = tl.run()

# Format for the OpenAI API
examples_df["prompt"] = examples_df["prompt"].apply(format_prompt)
examples_df["completion"] = examples_df["completion"].apply(format_completion)

# Split training & validation
train, valid = train_test_split(examples_df, test_size=0.2, random_state=42)
train.to_json("train.jsonl", orient='records', lines=True)
valid.to_json("valid.jsonl", orient='records', lines=True)

In [None]:
%%bash
openai api fine_tunes.create -t "train.jsonl" -v "valid.jsonl"

## Notify users of conversations they need to know about

In [None]:
# Receive Slack messages in real-time
live_messages = k.source.read_stream(entity_column="channel", time_column="ts")

# Receive messages from Slack
def handle_message(client, req):
    # Acknowledge the message back to Slack
    client.send_socket_mode_response(SocketModeResponse(envelope_id=req.envelope_id))
    
    # Deliver the message to Kaskada
    live_messages.add_event(pyarrow.json.read_json(req.payload))
client.socket_mode_request_listeners.append(handle_message)
client.connect()

# Handle messages in realtime
for p in prompt(live_messages).run(starting=datetime.now()).to_generator():
    
    # Ask the model who should be notified
    completions = openai.Completion.create(
        model="ft-2zaA7qi0rxJduWQpdvOvmGn3", 
        prompt=format_prompt(p),
        max_tokens=10,
        temperature=0,
    )
    users = json.loads(completions.choices[0].text)
    
    # Send notification to users
    for user in users:
        permalink = slack.web_client.chat_getPermalink(
            channel=prompt["_entity"],
            message_ts=prompt["_time"],
        )["permalink"]
        
        app_channel = slack.web_client.users_conversations(
            types="im",
            user=user,
        )["channels"][0]["id"]
        
        slack.web_client.chat_postMessage(
            channel=app_channel,
            text=f'You put eyes on this message: <{link}|{message_text}>'
        )

## Legacy Implementation


In [None]:
%load_ext fenlmagic

In [None]:
from kaskada.api.session import LocalBuilder
from kaskada import table

session = LocalBuilder().build()

In [5]:

#table.delete_table("Message")

table.create_table(
  # The table's name
  table_name = "Message",
  # The name of the column in the data that contains the time associated with each row
  time_column_name = "ts",
  # The name of the column in the data that contains the entity key associated with each row
  entity_key_column_name = "channel",
  grouping_id = "Channel",
)


0,1
table,table_nameMessageentity_key_column_namechanneltime_column_nametsgrouping_idChannelversion0create_time2023-08-03T09:29:52.105594update_time2023-08-03T09:29:52.105594
request_details,request_id81452f5e9fbcff7f453eb99d2ebb15bc

0,1
table_name,Message
entity_key_column_name,channel
time_column_name,ts
grouping_id,Channel
version,0
create_time,2023-08-03T09:29:52.105594
update_time,2023-08-03T09:29:52.105594

0,1
request_id,81452f5e9fbcff7f453eb99d2ebb15bc


In [6]:

table.load(table_name = "Message", file = "messages.parquet")

0,1
data_token_id,ebdaa884-33d9-49f1-9e9d-61af6235dddb
request_details,request_id895b6120d06754bf458f27c15be4839b

0,1
request_id,895b6120d06754bf458f27c15be4839b


In [7]:
%%fenl
Message

IPython.core.error.UsageError: Missing optional dependency 'fsspec'.  Use pip or conda to install fsspec.


INFO:kaskada.api.local_session.local_session_keep_alive:Attempting to recover from an invalid session.


In [None]:
%%fenl
let message = Message | when(Message.subtype == "message")

let last_message = message.lag(1)
let since_last_message = seconds_between(message.time_of(), last_message.time_of()) as i64
let conversation_start = since_last_message > 5 * 60

let prompt = {
    # TODO: List rather than last
    recent_messages: message | select_fields($input, "user", "text") | last(),

    # # TODO: List support (for reactions field)
    engage_count: message
    | when(message.reactions | is_valid())
    | count(window=since(conversation_start)),
}

let completion = message.user

let examples = {
    prompt: prompt, # TODO: Support lag: | lag(1),
    completion: completion,
}

in examples | when($input.prompt | is_valid())

In [None]:
# Split training & validation
from sklearn.model_selection import train_test_split
train, valid = train_test_split(
  examples.dataframe,
  test_size=0.2, random_state=42
)
train.to_json("train.jsonl", orient='records', lines=True)
valid.to_json("valid.jsonl", orient='records', lines=True)

# Fine tune a model
!openai api fine_tunes.create -t "train.jsonl" -v "valid.jsonl"