# Slackbot Example

SlackBot keeps you in the loop without disturbing your focus. Its personalized, intelligent AI continuously monitors your Slack workspace, alerting you to important conversations and freeing you to concentrate on what’s most important.

SlackBot reads the full history of your (public) Slack workspace and trains a Generative AI model to predict when you need to engage with a conversation. This training process gives the AI a deep understanding of your interests, expertise, and relationships. Using this understanding, SlackBot watches conversations in real-time and notifies you when an important conversation is happening without you. With SlackBot200 you can focus on getting things done without worrying about missing out.

In this notebook, you’ll see you how to build and deploy SlackBot in 15 minutes using only OpenAI’s API’s and open-source Python libraries - Data Science PhD not required.



In [None]:
%pip install openai kaskada

In [None]:
from datetime import datetime, timedelta
from slack_sdk.socket_mode import SocketModeClient, SocketModeResponse
import sparrow_pi as kt
import openai
import getpass
import pyarrow

# Initialize Kaskada with a local execution context.
kt.init_session()

# Initialize OpenAI
openai.api_key = getpass.getpass('OpenAI: API Key')

# Initialize Slack
slack = SocketModeClient(
    app_token=getpass.getpass('Slack: App Token'),
    web_client=getpass.getpass('Slack: Bot Token'),
)

## Prompt Engineering

In [None]:
def build_conversation(messages):
    message_time = messages.col("ts")
    last_message_time = message_time.lag(1) # !!!
    is_new_conversation = message_time.seconds_since(last_message_time) > 10 * 60

    return messages \
        .select("user", "ts", "text", "reactions") \
        .collect(window=kt.windows.Since(is_new_conversation), max=100) \
        .select("user", "ts", "text") \
        .collect(window=kt.windows.Since(is_new_conversation), max=100)

In [None]:
def build_examples(messages):
    duration = kt.minutes(5)  # !!!

    coverstation = build_conversation(messages)
    shifted_coversation = coverstation.shift_by(duration)  # !!!

    reaction_users = coverstation.col("reactions").col("name").collect(kt.windows.Trailing(duration)).flatten()  # !!!
    participating_users = coverstation.col("user").collect(kt.windows.Trailing(duration))  # !!!
    engaged_users = kt.union(reaction_users, participating_users)  # !!!

    return kt.record({ "prompt": shifted_coversation, "completion": engaged_users}) \
        .filter(shifted_coversation.is_not_null())

## Fine-tune the model

In [None]:
import pandas
import sparrow_pi.sources as sources

messages = kt.sources.Parquet("./messages.parquet", time = "ts", entity = "channel")
messages = messages.with_key(kt.record({  # !!!
        "channel": messages.col("channel"),
        "thread": messages.col("thread_ts"),
    }))
examples = build_examples(messages)

examples_df = examples.run().to_pandas()

In [None]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
le.fit(examples_df.completion.explode())

# Format for the OpenAI API
def format_prompt(prompt):
    return "start -> " + "\n\n".join([f' {msg.user} --> {msg.text} ' for msg in prompt]) + "\n\n###\n\n"
examples_df.prompt = examples_df.prompt.apply(format_prompt)

def format_completion(completion):
    return " " + (" ".join([le.transform(u) for u in completion]) if len(completion) > 0 else "nil") + " end"
examples_df.completion = examples_df.completion.apply(format_completion)

# Write examples to file
examples_df.to_json("examples.jsonl", orient='records', lines=True)

In [None]:
from types import SimpleNamespace
from openai import cli

# verifiy data format, split for training & validation
args = SimpleNamespace(file='./examples.jsonl', quiet=True)
cli.FineTune.prepare_data(args)
training_id = cli.FineTune._get_or_upload('./examples_prepared_train.jsonl', True)

In [None]:
import openai

resp = openai.FineTune.create(
    training_file = training_id,
    model = "davinci",
    n_epochs = 2,
    learning_rate_multiplier = 0.02,
    suffix = "coversation_users"
)
print(f'Fine-tuning model with job ID: "{resp["id"]}"')

## Notify users of conversations they need to know about

In [None]:
import json, math

min_prob_for_response = 0.75

# Receive Slack messages in real-time
live_messages = kt.sources.read_stream(entity_column="channel", time_column="ts")

# Receive messages from Slack
def handle_message(client, req):
    # Acknowledge the message back to Slack
    client.send_socket_mode_response(SocketModeResponse(envelope_id=req.envelope_id))
    
    # Deliver the message to Kaskada
    live_messages.add_event(pyarrow.json.read_json(req.payload))
slack.socket_mode_request_listeners.append(handle_message)
slack.connect()

# Handle messages in realtime
# A "conversation" is a list of messages
for conversation in build_conversation(live_messages).start().to_generator():
    if len(conversation) == 0:
        continue
    
    # Ask the model who should be notified
    res = openai.Completion.create(
        model="ft-2zaA7qi0rxJduWQpdvOvmGn3", 
        prompt=format_prompt(conversation),
        max_tokens=1,
        temperature=0,
        logprobs=5,
    )

    users = []
    logprobs = res["choices"][0]["logprobs"]["top_logprobs"][0]
    for user in logprobs:
        if math.exp(logprobs[user]) > min_prob_for_response:
            # if `nil` user is an option, stop processing
            if user == "nil":
                users = []
                break
            users.append(user)

    # alert on most recent message in conversation
    msg = conversation.pop()
    
    # Send notification to users
    for user in users:
        user_id = le.inverse_transform(user)

        link = slack.web_client.chat_getPermalink(
            channel=msg["channel"],
            message_ts=msg["ts"],
        )["permalink"]
        
        app_channel = slack.web_client.users_conversations(
            types="im",
            user=user_id,
        )["channels"][0]["id"]
        
        slack.web_client.chat_postMessage(
            channel=app_channel,
            text=f'You may be interested in this converstation: <{link}|{msg["text"]}>'
        )

## Legacy Implementation


In [None]:
%load_ext fenlmagic

In [None]:
from kaskada.api.session import LocalBuilder
from kaskada import table

session = LocalBuilder().build()

In [None]:

#table.delete_table("Message")

table.create_table(
  # The table's name
  table_name = "Message",
  # The name of the column in the data that contains the time associated with each row
  time_column_name = "ts",
  # The name of the column in the data that contains the entity key associated with each row
  entity_key_column_name = "channel",
  grouping_id = "Channel",
)


In [None]:

table.load(table_name = "Message", file = "messages.parquet")

In [None]:
%%fenl
Message

In [None]:
%%fenl
let message = Message | when(Message.subtype == "message")

let last_message = message.lag(1)
let since_last_message = seconds_between(message.time_of(), last_message.time_of()) as i64
let conversation_start = since_last_message > 5 * 60

let prompt = {
    # TODO: List rather than last
    recent_messages: message | select_fields($input, "user", "text") | last(),

    # # TODO: List support (for reactions field)
    engage_count: message
    | when(message.reactions | is_valid())
    | count(window=since(conversation_start)),
}

let completion = message.user

let examples = {
    prompt: prompt, # TODO: Support lag: | lag(1),
    completion: completion,
}

in examples | when($input.prompt | is_valid())

In [None]:
# Split training & validation
from sklearn.model_selection import train_test_split
train, valid = train_test_split(
  examples.dataframe,
  test_size=0.2, random_state=42
)
train.to_json("train.jsonl", orient='records', lines=True)
valid.to_json("valid.jsonl", orient='records', lines=True)

# Fine tune a model
!openai api fine_tunes.create -t "train.jsonl" -v "valid.jsonl"