# Slackbot Example

SlackBot keeps you in the loop without disturbing your focus. Its personalized, intelligent AI continuously monitors your Slack workspace, alerting you to important conversations and freeing you to concentrate on what’s most important.

SlackBot reads the full history of your (public) Slack workspace and trains a Generative AI model to predict when you need to engage with a conversation. This training process gives the AI a deep understanding of your interests, expertise, and relationships. Using this understanding, SlackBot watches conversations in real-time and notifies you when an important conversation is happening without you. With SlackBot200 you can focus on getting things done without worrying about missing out.

In this notebook, you’ll see you how to build and deploy SlackBot in 15 minutes using only OpenAI’s API’s and open-source Python libraries - Data Science PhD not required.



In [1]:
!pip install openai kaskada

Collecting openai
  Downloading openai-0.27.8-py3-none-any.whl (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 KB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting aiohttp
  Downloading aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl (343 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m343.9/343.9 KB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting multidict<7.0,>=4.5
  Using cached multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl (29 kB)
Collecting aiosignal>=1.1.2
  Using cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)
Collecting frozenlist>=1.1.1
  Downloading frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 KB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting async-timeout<5.0,>=4.0.0a3
  Using cached async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Collecting yarl<2.0,>=1.0
  Using cached yarl

In [None]:
from datetime import datetime, timedelta
import openai
import kaskada as k

# Download Slack history from their API
# -> https://slack.com/help/articles/201658943-Export-your-workspace-data
# -> https://api.slack.com/events/message 
# Example Event:
# { 	
#   "type": "message", 	
#   "channel": "C123ABC456", 	
#   "user": "U123ABC456", 	
#   "text": "Hello world", 	
#   "ts": "1355517523.000005" 
# }

# Create a new local session
s = k.session()

# Pull live history into Astra Streaming
# -> Airbyte to Pulsar
# -> https://docs.airbyte.com/integrations/sources/slack
# -> https://docs.airbyte.com/integrations/destinations/pulsar

# Load messages into Kaskada
# For training: message = k.read_parquet(..., entity="user_id")
message = s.read_pulsar(..., entity="user_id")

# Build prompt
messages_by_channel = message.with_key(message.channel)
prompt = k.make_record( 
  # A list of all messages over the past 10 minutes (up to 100)
  recent_messages=messages_by_channel 
    .select("user", "type", "text")
    .last_duration(timedelta(minutes=10), max=100),
  # How many messages have been reacted to in the last 1 minutes
  engage_count=messages_by_channel
    .when(messages_by_channel.get("reactions").is_not_null())
    .count(window=duration(timedelta(minutes=10))),
  # The timestamp of the last message
  time=time_of(messages_by_channel),
)

# We'll train ChatGPT to generate the user ID who will engage next
completion = "\n\n###\n\n" + message.get("user_id")

# Each example will predict who will engage next, given
# the last 10 minutes of conversation history
examples = k.make_record(
  prompt=prompt.lag(1).lookup(message.get("channel")),
  completion=completion,
)
examples = examples.when(examples.get("prompt").is_not_null())

# Split training & validation
from sklearn.model_selection import train_test_split
train, valid = train_test_split(
  examples.to_pandas(),
  test_size=0.2, random_state=42
)
train.to_json("train.jsonl", orient='records', lines=True)
valid.to_json("valid.jsonl", orient='records', lines=True)

# Fine tune a model
!openai api fine_tunes.create -t "train.jsonl" -v "valid.jsonl"

# Apply in real-time
for p in prompt.to_generator(starting=datetime.now()):
  completions = openai.ChatCompletion.create(
    model="ft-2zaA7qi0rxJduWQpdvOvmGn3", 
    messages=[{"role": "user", "content": p}],
    n=5, logprobs=5,
  )
  for completion in completions:
    if sum(completion.logprobs) > 0.05:
      user = completion.completion
      # Use the Slack API to PM the user?
      print(f"Notify {user} of conversation: \n{p}") 

## Legacy Implementation


In [1]:
%load_ext fenlmagic

INFO:fenlmagic:extension loaded


In [2]:
from kaskada.api.session import LocalBuilder
from kaskada import table

session = LocalBuilder().build()

INFO:kaskada.api.release:Using latest release version: engine@v0.10.0
INFO:kaskada.api.release:Skipping download. Using binary: /Users/ryan.michael/.cache/kaskada/bin/engine@v0.10.0/kaskada-engine
INFO:kaskada.api.release:Skipping download. Using binary: /Users/ryan.michael/.cache/kaskada/bin/engine@v0.10.0/kaskada-manager
INFO:kaskada.api.local_session.local_service:Initializing manager process
INFO:kaskada.api.local_session.local_service:Logging manager STDOUT to /Users/ryan.michael/.cache/kaskada/logs/2023-07-28T11-31-56-manager-stdout.log
INFO:kaskada.api.local_session.local_service:Logging manager STDERR to /Users/ryan.michael/.cache/kaskada/logs/2023-07-28T11-31-56-manager-stdout.log
INFO:kaskada.api.local_session.local_service:Initializing engine process
INFO:kaskada.api.local_session.local_service:Logging engine STDOUT to /Users/ryan.michael/.cache/kaskada/logs/2023-07-28T11-31-56-engine-stdout.log
INFO:kaskada.api.local_session.local_service:Logging engine STDERR to /Users/rya

In [4]:

table.delete_table("Message")

table.create_table(
  # The table's name
  table_name = "Message",
  # The name of the column in the data that contains the time associated with each row
  time_column_name = "ts",
  # The name of the column in the data that contains the entity key associated with each row
  entity_key_column_name = "channel",
  grouping_id = "Channel",
)


0,1
table,table_nameMessageentity_key_column_namechanneltime_column_nametsgrouping_idChannelversion0create_time2023-07-28T11:32:06.048919update_time2023-07-28T11:32:06.048919
request_details,request_id3b833c47a4bdefe9db1b4a47b16e72ed

0,1
table_name,Message
entity_key_column_name,channel
time_column_name,ts
grouping_id,Channel
version,0
create_time,2023-07-28T11:32:06.048919
update_time,2023-07-28T11:32:06.048919

0,1
request_id,3b833c47a4bdefe9db1b4a47b16e72ed


In [7]:

table.load(table_name = "Message", file = "messages.parquet")

0,1
data_token_id,bbbb3c73-ec16-45f5-8b2c-190400b84211
request_details,request_id5fb6ff7e6251b69194f76f824537a43e

0,1
request_id,5fb6ff7e6251b69194f76f824537a43e


In [9]:
%%fenl
Message

Unnamed: 0,_time,_subsort,_key_hash,_key,subtype,ts,user,text,team,user_team,source_team,user_profile,inviter,edited,thread_ts,reply_count,reply_users_count,latest_reply,is_locked,subscribed,last_read,parent_user_id,channel
0,2023-07-25 19:42:13,4615122424080796985,10990799577862752293,general,message,2023-07-25 19:42:13,U05JQJJDJ6P,<@U05JQJJDJ6P>has joined the channel,,,,,,,,,,,,,,,general
1,2023-07-25 19:42:14,4615122424080796994,11024877639669158603,random,message,2023-07-25 19:42:14,U05JQJJDJ6P,<@U05JQJJDJ6P>has joined the channel,,,,,,,,,,,,,,,random
2,2023-07-25 19:44:27,4615122424080796980,604782357839949305,demo,message,2023-07-25 19:44:27,U05JQJJDJ6P,<@U05JQJJDJ6P>has joined the channel,,,,,,,,,,,,,,,demo
3,2023-07-26 08:29:35,4615122424080796986,10990799577862752293,general,message,2023-07-26 08:29:35,U05JQJJDJ6P,old message 1,T05JA5XCR9D,T05JA5XCR9D,T05JA5XCR9D,"{'avatar_hash': 'g18ebd8787ba', 'image_72': 'h...",,,,,,,,,,,general
4,2023-07-26 08:29:37,4615122424080796987,10990799577862752293,general,message,2023-07-26 08:29:37,U05JQJJDJ6P,old message 2,T05JA5XCR9D,T05JA5XCR9D,T05JA5XCR9D,"{'avatar_hash': 'g18ebd8787ba', 'image_72': 'h...",,"{'user': 'U05JQJJDJ6P', 'ts': '1690360188.0000...",,,,,,,,,general
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19,2023-07-26 20:16:30,4615122424080796990,10990799577862752293,general,message,2023-07-26 20:16:30,U05JV3K9RB7,<@U05JV3K9RB7>has joined the channel,,,,,,,,,,,,,,,general
20,2023-07-26 20:16:30,4615122424080797003,11024877639669158603,random,message,2023-07-26 20:16:30,U05JV3K9RB7,<@U05JV3K9RB7>has joined the channel,,,,,,,,,,,,,,,random
21,2023-07-26 20:18:25,4615122424080796991,10990799577862752293,general,message,2023-07-26 20:18:25,U05JV3K9RB7,new message 1,T05JA5XCR9D,T05JA5XCR9D,T05JA5XCR9D,"{'avatar_hash': '356a52f83d46', 'image_72': 'h...",,,,,,,,,,,general
22,2023-07-26 20:31:04,4615122424080796992,10990799577862752293,general,message,2023-07-26 20:31:04,U05JH8BCZST,"Hello Charna, welcome to Slack!",T05JA5XCR9D,T05JA5XCR9D,T05JA5XCR9D,"{'avatar_hash': 'b2cae8d05d70', 'image_72': 'h...",,,,,,,,,,,general

0,1
state,SUCCESS
query_id,222cbc39-bff4-4f22-8520-12ba35d0ef79
metrics,time_preparing0.007stime_computing0.015soutput_files1
analysis,can_executeTrue
schema,(see Schema tab)
request_details,request_id2698b593673e3d32737cbadeba4c8707
expression,Message

0,1
time_preparing,0.007s
time_computing,0.015s
output_files,1

0,1
can_execute,True

0,1
request_id,2698b593673e3d32737cbadeba4c8707

Unnamed: 0,column_name,column_type
0,subtype,string
1,ts,timestamp_us
2,user,string
3,text,string
4,team,string
5,user_team,string
6,source_team,string
7,user_profile,
8,inviter,string
9,edited,


In [30]:
%%fenl
let message = Message | when(Message.subtype == "message")

let prompt = {
    # TODO: List rather than last
    recent_messages: message | select_fields($input, "user", "text") | last(),

    # # TODO: List support (for reactions field)
    # engage_count: message
    # | when(message.reactions | is_valid())
    # | count(window=sliding(10, minutely())),

    # TODO: the time of day
    time: time_of(message), 
}

let completion = message.user

let examples = {
    prompt: prompt, # TODO: Support lag: | lag(1),
    completion: completion,
}

in examples | when($input.prompt | is_valid())

Unnamed: 0,_time,_subsort,_key_hash,_key,prompt,completion
0,2023-07-25 19:42:13,4615122424080796985,10990799577862752293,general,"{'recent_messages': {'user': 'U05JQJJDJ6P', 't...",U05JQJJDJ6P
1,2023-07-25 19:42:14,4615122424080796994,11024877639669158603,random,"{'recent_messages': {'user': 'U05JQJJDJ6P', 't...",U05JQJJDJ6P
2,2023-07-25 19:44:27,4615122424080796980,604782357839949305,demo,"{'recent_messages': {'user': 'U05JQJJDJ6P', 't...",U05JQJJDJ6P
3,2023-07-26 08:29:35,4615122424080796986,10990799577862752293,general,"{'recent_messages': {'user': 'U05JQJJDJ6P', 't...",U05JQJJDJ6P
4,2023-07-26 08:29:37,4615122424080796987,10990799577862752293,general,"{'recent_messages': {'user': 'U05JQJJDJ6P', 't...",U05JQJJDJ6P
...,...,...,...,...,...,...
19,2023-07-26 20:16:30,4615122424080796990,10990799577862752293,general,"{'recent_messages': {'user': 'U05JV3K9RB7', 't...",U05JV3K9RB7
20,2023-07-26 20:16:30,4615122424080797003,11024877639669158603,random,"{'recent_messages': {'user': 'U05JV3K9RB7', 't...",U05JV3K9RB7
21,2023-07-26 20:18:25,4615122424080796991,10990799577862752293,general,"{'recent_messages': {'user': 'U05JV3K9RB7', 't...",U05JV3K9RB7
22,2023-07-26 20:31:04,4615122424080796992,10990799577862752293,general,"{'recent_messages': {'user': 'U05JH8BCZST', 't...",U05JH8BCZST

0,1
state,SUCCESS
query_id,e3ba1857-bed3-4d3a-b376-3365be2bcaec
metrics,time_preparing0.002stime_computing0.013soutput_files1
analysis,can_executeTrue
schema,(see Schema tab)
request_details,request_id6176cce6ca1aaa4bbe816984c68e708a
expression,"let message = Message | when(Message.subtype == ""message"") let prompt = {  # TODO: List rather than last  recent_messages: message | select_fields($input, ""user"", ""text"") | last(),  # # TODO: List support (for reactions field)  # engage_count: message  # | when(message.reactions | is_valid())  # | count(window=sliding(10, minutely())),  # TODO: the time of day  time: time_of(message), } let completion = message.user let examples = {  prompt: prompt, # TODO: Support lag: | lag(1),  completion: completion, } in examples | when($input.prompt | is_valid())"

0,1
time_preparing,0.002s
time_computing,0.013s
output_files,1

0,1
can_execute,True

0,1
request_id,6176cce6ca1aaa4bbe816984c68e708a

Unnamed: 0,column_name,column_type
0,prompt,
1,completion,string


In [None]:
# Split training & validation
from sklearn.model_selection import train_test_split
train, valid = train_test_split(
  examples.dataframe,
  test_size=0.2, random_state=42
)
train.to_json("train.jsonl", orient='records', lines=True)
valid.to_json("valid.jsonl", orient='records', lines=True)

# Fine tune a model
!openai api fine_tunes.create -t "train.jsonl" -v "valid.jsonl"