# Inference

### For the 3 models, try to predict on some messages


In [2]:
!pip3 install --upgrade openai -q

In [1]:
import json, openai, time, pandas, random, getpass
from openai import cli
from types import SimpleNamespace
from sklearn.model_selection import train_test_split
from file_read_backwards import FileReadBackwards

work_dir = "/Users/eric.pinzur/Documents/slackbot2000"
openai.api_key = getpass.getpass(prompt="Please enter your OpenAI API Key")

### Model IDs

In [3]:
next_message_model = 'ada:ft-personal:next-message-user-full-kaskada-2023-08-03-15-42-55'

next_reaction_model = 'ada:ft-personal:next-reaction-full-kaskada-2023-08-03-17-03-13'

next_users_model = 'ada:ft-personal:next-users-in-window-full-kaskada-2023-08-03-18-17-36'

## Next Message User

For a set of recent messages, try to predict the next user that will reply.

This is a classification problem, so we can get log probabilities

In [2]:
nm_completion_separator = ""

nm_train = pandas.read_json(f'{work_dir}/next_message_user_cleaned_train.jsonl', lines=True)
nm_valid = pandas.read_json(f'{work_dir}/next_message_user_cleaned_valid.jsonl', lines=True)

### With Training Data

In [4]:
for i in range(10):
    prompt = nm_train['prompt'][i]
    completion = nm_train['completion'][i]

    res = openai.Completion.create(model=next_message_model, prompt=prompt, max_tokens=1, temperature=0, logprobs=3)
    choice = res['choices'][0]['text']
    log_probs = res['choices'][0]['logprobs']['top_logprobs'][0]

    print(f'Choice: {choice}, Actual: {completion}, Log Probabilites:')
    print(log_probs) 
    print()

Choice:  1, Actual: 1, Log Probabilites:
{
  " 1": -0.9657385,
  " 2": -1.4050677,
  " 4": -1.596037
}

Choice:  1, Actual: 2, Log Probabilites:
{
  " 1": -1.0685356,
  " 2": -1.5028064,
  " 4": -1.3944669
}

Choice:  1, Actual: 2, Log Probabilites:
{
  " 1": -1.050273,
  " 2": -1.5859404,
  " 4": -1.3914024
}

Choice:  4, Actual: 1, Log Probabilites:
{
  " 1": -1.2921542,
  " 2": -1.9428157,
  " 4": -1.2020205
}

Choice:  4, Actual: 1, Log Probabilites:
{
  " 1": -1.3886855,
  " 2": -1.8444825,
  " 4": -1.2994654
}

Choice:  1, Actual: 3, Log Probabilites:
{
  " 1": -0.8068829,
  " 4": -1.8223191,
  " 6": -1.9705716
}

Choice:  1, Actual: 1, Log Probabilites:
{
  " 1": -0.81668377,
  " 2": -1.960412,
  " 4": -1.8723706
}

Choice:  1, Actual: 4, Log Probabilites:
{
  " 1": -0.9468565,
  " 4": -1.9926653,
  " 6": -1.6005487
}

Choice:  1, Actual: 4, Log Probabilites:
{
  " 1": -1.0770806,
  " 4": -1.8575277,
  " 6": -1.674056
}

Choice:  1, Actual: 1, Log Probabilites:
{
  " 1": -1.0844

### With Validation Data

In [15]:
for i in range(10):
    prompt = nm_valid['prompt'][i]
    completion = nm_valid['completion'][i]

    res = openai.Completion.create(model=next_message_model, prompt=prompt, max_tokens=1, temperature=0, logprobs=3)
    choice = res['choices'][0]['text']
    log_probs = res['choices'][0]['logprobs']['top_logprobs'][0]

    print(f'Choice: {choice}, Actual: {completion}, Log Probabilites:')
    print(log_probs) 
    print()

With Validation data:

Choice:  1, Actual: 2, Log Probabilites:
{
  " 1": -1.1703091,
  " 2": -1.7772275,
  " 4": -1.2289906
}

Choice:  1, Actual: 4, Log Probabilites:
{
  " 1": -0.9397264,
  " 4": -1.7467943,
  " 6": -1.9116483
}

Choice:  4, Actual: 1, Log Probabilites:
{
  " 1": -1.6808176,
  " 2": -1.3595152,
  " 4": -1.3301457
}

Choice:  4, Actual: 1, Log Probabilites:
{
  " 1": -1.2772946,
  " 4": -1.2063621,
  " 6": -1.9627421
}

Choice:  4, Actual: 1, Log Probabilites:
{
  " 1": -1.5786409,
  " 4": -1.0844089,
  " 6": -1.70502
}

Choice:  1, Actual: 2, Log Probabilites:
{
  " 1": -1.3150066,
  " 4": -1.3966527,
  " 6": -1.5908817
}

Choice:  1, Actual: 1, Log Probabilites:
{
  " 1": -0.8575319,
  " 2": -2.4999099,
  " 4": -1.3080171
}

Choice:  1, Actual: 6, Log Probabilites:
{
  " 1": -0.82480097,
  " 4": -1.3392587,
  " 6": -2.303694
}

Choice:  1, Actual: 5, Log Probabilites:
{
  " 1": -0.7997786,
  " 4": -1.4031092,
  " 6": -2.1099305
}

Choice:  1, Actual: 3, Log Probabi

## Next Reaction

For a set of recent messages, try to predict the reaction (if any) to the next message.

In [21]:
nr_completion_separator = " END"

nr_train = pandas.read_json(f'{work_dir}/next_reaction_train.jsonl', lines=True)
nr_valid = pandas.read_json(f'{work_dir}/next_reaction_valid.jsonl', lines=True)

### With Training Data

In [29]:
examples = 5

random.seed()
pos_count = 0
neg_count = 0

i = 0
while True:
    if pos_count == examples and neg_count == examples:
        break

    i += random.randint(0, 10)
    prompt = nr_train['prompt'][i]
    completion = nr_train['completion'][i]

    pos = completion != f' {nr_completion_separator}'
    if pos:
        if pos_count < examples:
            pos_count += 1
        else:
            continue
    else:
        if neg_count < examples:
            neg_count += 1
        else: 
            continue

    res = openai.Completion.create(model=next_reaction_model, prompt=prompt, max_tokens=20, temperature=0, stop=nr_completion_separator)
    choice = res['choices'][0]['text']

    print(f'Choice: {choice}, Actual: {completion}')
    print()

Choice:  , Actual:   END

Choice:  , Actual:   END

Choice:  , Actual:   END

Choice:  , Actual:   END

Choice:  , Actual:   END

Choice:  , Actual:  [{"name": "+1", "users": ["UU5C7MNMA"], "count": 1}] END

Choice:  , Actual:  [{"name": "partygopher", "users": ["UU5C7MNMA"], "count": 1}] END

Choice:  , Actual:  [{"name": "doot", "users": ["U017T5TFW58", "U016TM9NXEY"], "count": 2}] END

Choice:  , Actual:  [{"name": "+1", "users": ["UU5C7MNMA"], "count": 1}] END

Choice:  , Actual:  [{"name": "+1", "users": ["UU5C7MNMA"], "count": 1}] END



### With Validation Data

In [30]:
examples = 5

random.seed()
pos_count = 0
neg_count = 0

i = 0
while True:
    if pos_count == examples and neg_count == examples:
        break

    i += random.randint(0, 10)
    prompt = nr_valid['prompt'][i]
    completion = nr_valid['completion'][i]

    pos = completion != f' {nr_completion_separator}'
    if pos:
        if pos_count < examples:
            pos_count += 1
        else:
            continue
    else:
        if neg_count < examples:
            neg_count += 1
        else: 
            continue

    res = openai.Completion.create(model=next_reaction_model, prompt=prompt, max_tokens=20, temperature=0, stop=nr_completion_separator)
    choice = res['choices'][0]['text']

    print(f'Choice: {choice}, Actual: {completion}')
    print()

Choice:  , Actual:   END

Choice:  , Actual:   END

Choice:  , Actual:   END

Choice:  , Actual:   END

Choice:  , Actual:   END

Choice:  , Actual:  [{"name": "+1", "users": ["UU5C7MNMA"], "count": 1}] END

Choice:  , Actual:  [{"name": "+1", "users": ["U03RNK543HC"], "count": 1}] END

Choice:  , Actual:  [{"name": "+1", "users": ["U012CLUV1KJ"], "count": 1}] END

Choice:  , Actual:  [{"name": "sweat_smile", "users": ["U012CLUV1KJ"], "count": 1}] END

Choice:  , Actual:  [{"name": "+1", "users": ["U012CLUV1KJ"], "count": 1}] END



## Next Users in Window

For a set of recent messages, try to predict the set of users that might interact next.

In [3]:
nu_completion_separator = " END"

nu_train = pandas.read_json(f'{work_dir}/next_users_in_window_train.jsonl', lines=True)
nu_valid = pandas.read_json(f'{work_dir}/next_users_in_window_valid.jsonl', lines=True)

### With Training Data

In [12]:
examples = 5

random.seed()
pos_count = 0
neg_count = 0

i = 0
while True:
    if pos_count == examples and neg_count == examples:
        break

    i += random.randint(0, 5)
    prompt = nu_train['prompt'][i]
    completion = nu_train['completion'][i]

    pos = completion != f' []{nu_completion_separator}'
    if pos:
        if pos_count < examples:
            pos_count += 1
        else:
            continue
    else:
        if neg_count < examples:
            neg_count += 1
        else: 
            continue

    res = openai.Completion.create(model=next_users_model, prompt=prompt, max_tokens=50, temperature=0, stop=nu_completion_separator)
    choice = res['choices'][0]['text']

    print(f'Choice: {choice}, Actual: {completion}')
    print()

Choice:  ["U012CLUV1KJ"], Actual:  [] END

Choice:  ["U017T5TFW58"], Actual:  ["U016TM9NXEY", "UCZ4VJF6J"] END

Choice:  ["U012CLUV1KJ"], Actual:  ["U012CLUV1KJ"] END

Choice:  ["U012CLUV1KJ"], Actual:  ["U012CLUV1KJ"] END

Choice:  ["U012CLUV1KJ"], Actual:  ["U012CLUV1KJ"] END

Choice:  ["U012CLUV1KJ"], Actual:  [] END

Choice:  ["U012CLUV1KJ"], Actual:  [] END

Choice:  ["UU5C7MNMA"], Actual:  [] END

Choice:  [], Actual:  ["U012CLUV1KJ", "U016TM9NXEY", "UCZ4VJF6J"] END

Choice:  ["U016TM9NXEY"], Actual:  [] END



### With Validation Data

In [13]:
examples = 5

random.seed()
pos_count = 0
neg_count = 0

i = 0
while True:
    if pos_count == examples and neg_count == examples:
        break

    i += random.randint(0, 5)
    prompt = nu_valid['prompt'][i]
    completion = nu_valid['completion'][i]

    pos = completion != f' []{nu_completion_separator}'
    if pos:
        if pos_count < examples:
            pos_count += 1
        else:
            continue
    else:
        if neg_count < examples:
            neg_count += 1
        else: 
            continue

    res = openai.Completion.create(model=next_users_model, prompt=prompt, max_tokens=50, temperature=0, stop=nu_completion_separator)
    choice = res['choices'][0]['text']

    print(f'Choice: {choice}, Actual: {completion}')
    print()

Choice:  [], Actual:  [] END

Choice:  [], Actual:  [] END

Choice:  ["U016TM9NXEY"], Actual:  ["UCZ4VJF6J"] END

Choice:  ["U012CLUV1KJ"], Actual:  [] END

Choice:  ["U012CLUV1KJ"], Actual:  [] END

Choice:  ["U012CLUV1KJ"], Actual:  [] END

Choice:  ["U017T5TFW58"], Actual:  ["UCZ4VJF6J", "UU5C7MNMA"] END

Choice:  ["U017T5TFW58"], Actual:  ["UCZ4VJF6J", "UU5C7MNMA"] END

Choice:  ["U012CLUV1KJ"], Actual:  ["U012CLUV1KJ", "U03RNK543HC"] END

Choice:  ["U012CLUV1KJ"], Actual:  ["U012CLUV1KJ", "U03RNK543HC"] END

