In [32]:
import pandas as pd
import os
import re

# Read the call data from the CSV file
call_data = pd.read_csv('data/call_data.csv')

transcripts = {}

# Iterate over the files in the transcripts directory
for filename in os.listdir('data/transcripts'):
    # Get the SID from the filename
    sid = os.path.splitext(filename)[0]
    
    with open(f'data/transcripts/{filename}', 'r') as f:
        lines = f.readlines()
        
    lines = [re.sub(r'^\d+.\s', '', line.strip()) for line in lines]
    transcripts[sid] = lines

# Add the transcript data as a column to the call data
call_data['transcript'] = call_data['SID'].map(transcripts)

In [33]:
call_data

Unnamed: 0,SID,Had Timing Objection,Timing Objection Index,transcript
0,CA77596bc061516d795f6a60fbd274cb0e,False,,"[[Prospect] Hello?, [Sales Rep] Hey, John. H..."
1,CAd8395ea9fec545909e633bba6a8eb643,False,,"[[Prospect] Hello?, [Sales Rep] Hey, Ivan. S..."
2,CAf15429ca373443cd6a6a88573fe16f98,True,9,"[[Prospect] Hello?, [Sales Rep] Drake, this ..."
3,CA631c8faf6571f057e34bc12073da9f9c,False,,"[[Prospect] Hello?, [Sales Rep] File perform..."
4,CAbb4454527ef392d377ffd37a5bb00669,True,35,"[[Prospect] Hello?, [Sales Rep] Hey, Sean. I..."
...,...,...,...,...
193,CAb5a575591cfbaf24ac936657c5befd47,False,,"[[Sales Rep] Hey, Mark. Skyler with NUCs. I w..."
194,CA84e33cc6a9aac5c8ecf41fe25e8be7ce,True,9,"[[Prospect] Hello?, [Sales Rep] Maddie, hey...."
195,CAbfde17aa013e9896ece8102c8813f700,False,,"[[Sales Rep] Hello?, [Prospect] Hello?]"
196,CA16f2e7ff1215c1b52a509abd7c373732,False,,"[[Prospect] Hi. This is Brandon, [Sales Rep] ..."


As you can see, we have a small dataset of 198 calls labelled with whether there was a timing objection, and if so where in the call the timing objection is present (1-indexed).

We want to use this dataset to train and evaluate a realtime timing-objection detector. To do this, we will formulate the problem as a simple time-series prediction. For every distinct moment of the call (every monologue, for simplicity), we pass as input to the call all the text transcript data from before that moment, and try to build a model that can predict whether a timing objection has occured in the portion of the call it has seen so far.

In [37]:
# Initialize the training set X and the target set Y
X = []
Y = []

# Iterate over each call
for index, row in call_data.iterrows():
    # Get the SID and the transcript
    sid = row['SID']
    transcript = row['transcript']
    timing_objection_index = row['Timing Objection Index']

    # Iterate over each split point in the transcript
    for i in range(2, len(transcript)):
        # Create a new data point
        data_point = {
            'SID': sid,
            'monologues_so_far': transcript[:i],
        }
        # Add the data point to the training set
        X.append(data_point)

        # Add the corresponding target value to the target set
        Y.append(1 if timing_objection_index != "None" and i >= int(timing_objection_index) else 0)

# Convert the training set and the target set to pandas DataFrames for easier manipulation
X = pd.DataFrame(X)
Y = pd.Series(Y)

X

Unnamed: 0,SID,monologues_so_far
0,CA77596bc061516d795f6a60fbd274cb0e,"[[Prospect] Hello?, [Sales Rep] Hey, John. H..."
1,CA77596bc061516d795f6a60fbd274cb0e,"[[Prospect] Hello?, [Sales Rep] Hey, John. H..."
2,CA77596bc061516d795f6a60fbd274cb0e,"[[Prospect] Hello?, [Sales Rep] Hey, John. H..."
3,CA77596bc061516d795f6a60fbd274cb0e,"[[Prospect] Hello?, [Sales Rep] Hey, John. H..."
4,CA77596bc061516d795f6a60fbd274cb0e,"[[Prospect] Hello?, [Sales Rep] Hey, John. H..."
...,...,...
3158,CAa481788bab5e4fee93838fec4b49bd59,"[[Prospect] Paul speaking., [Sales Rep] Hey,..."
3159,CAa481788bab5e4fee93838fec4b49bd59,"[[Prospect] Paul speaking., [Sales Rep] Hey,..."
3160,CAa481788bab5e4fee93838fec4b49bd59,"[[Prospect] Paul speaking., [Sales Rep] Hey,..."
3161,CAa481788bab5e4fee93838fec4b49bd59,"[[Prospect] Paul speaking., [Sales Rep] Hey,..."


Now that we've constructed our dataset of inputs and model targets, we can build a model to predict the target class (whether a timing objection has occured in the call so far or not.) 

Normally we would split the data into a non-overlapping train and test set and train a model on the train set, but for demonstration purposes we can build a data-free model that just relies on human context about what a "timing objection" means in sales. Because of the advanced capabilities of LLMs like GPT-4, we can transform this human context into a fairly reliable predictor of timing objections even without any labelled data, just using prompt engineering.

In [38]:
import openai
import yaml

# Ask for an OpenAI API key to run this
openai.api_key = 'REDACTED'

def predict_timing_objection(call):
    # Join the monologues into a single string
    call_so_far = '\n'.join([f"[{i}] {call}" for i, call in enumerate(call['monologues_so_far'], start=1)])
    system_prompt = f"""
You are functioning as a virtual sales manager with the task of meticulously analyzing phone call transcripts between our Sales Representatives (Sales Rep) and potential clients (Prospect). Your role involves providing insight into the dynamics of the conversation, with a particular emphasis on identifying specific objections raised by the Prospect.

The transcripts you will be examining are structured as follows.

Each line of dialogue (where one speaker talks) is presented in the format:
[Monologue Index] [Speaker]: [Monologue]

Your task is to predict in real time whether the specific objection (described further below) happened given the text of the call so far.

"""

    call_transcript = f"""
###

Here is the complete transcript of the phone call so far:
{call_so_far}

###
"""

    user_prompt = """
Answer the following yes or no question based ONLY on the contents of the call transcript provided above. 
Does the call contain a timing objection? 
To provide clarity, a 'timing objection' refers to a scenario where the Prospect expresses interest in the product and is open to discussing its utilization within their company at a future date. However, they indicate that the current moment is not opportune for making purchasing decisions. This may be due to a variety of time-related factors such as the end of a financial quarter, the onboarding of new employees, ongoing layoffs, etc.

Examples of phrases that might indicate timing objections:
"Now is not the right time"
"Next quarter might be a better time to circle back"
"Right now our focus is on this, but that might change in the future"
"No way that we're buying anything this year, but next year maybe"

Please note that a 'timing objection' does not refer to instances where the Prospect indicates that it is simply a bad time for them to engage in the phone call itself. 
It specifically pertains to situations where there are external, time-bound factors affecting the Prospect’s ability or willingness to proceed with a purchasing decision.
Furthermore, a 'timing objection' is not applicable if the Prospect categorically refuses to consider the product for reasons that are likely to be permanent or long-term, such as an established strategy, commitment to a competitor, size of the company (too small), or budgetary constraints.
To reiterate, if the prospect says they are currently using a competitor but seem open to talking after the competitor's contract runs out, that does NOT count as a timing objection.

Objections can only be said by prospects, so if the sales rep mentions something time-related, that should not be counted as an objection.

Give your response in YAML format. The response should have the following fields:
reason - A string value that explains your reasoning for the answer.
monologueIndex - An integer value pointing to the first index of the monologue in the call transcript that justifies the answer to the question. Only provide this field if the answer was true.
answer - A boolean value of true or false. True indicates that the answer to the question was "yes". False indicates that the answer to the question was "no".

Here is an example of a well-formatted response using YAML format.
---
result:
    reason: This is the reason why the answer to the question is true.
    monologueIndex: 5
    answer: true

Thank you for your attention to detail and thorough analysis. Your insights are invaluable to our continuous efforts in enhancing our sales strategies and understanding client needs more effectively.

Your response:
"""

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": call_transcript},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0,
        max_tokens=1024,
    )

    # print (response.choices[0]["message"]["content"])
    # Extract the predicted output
    predicted_output = list(yaml.safe_load_all(response.choices[0]["message"]["content"]))[0]

    return predicted_output

In [39]:
import concurrent.futures
import time
from tqdm import tqdm

# Define a function that applies predict_timing_objection to a row and measures the time taken
def apply_predict_timing_objection(row):
    # Convert the row into a dictionary manually
    row_dict = {X.columns[i]: value for i, value in enumerate(row)}
    
    # Measure the start time
    start_time = time.time()
    
    # Apply predict_timing_objection
    result = predict_timing_objection(row_dict)
    
    # Measure the end time
    end_time = time.time()
    
    # Calculate the time taken
    time_taken = end_time - start_time
    
    return result, time_taken

# Create a ThreadPoolExecutor
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    # Use the executor to apply the function to every row in X_balanced
    results = list(tqdm(executor.map(apply_predict_timing_objection, X.itertuples(index=False)), total=len(X)))

# Separate the results and the times
Y_pred_list, times = zip(*results)

# Convert the result to a DataFrame
Y_pred = pd.DataFrame([result['result']['answer'] for result in Y_pred_list])
Y_reason = pd.DataFrame([result['result']['reason'] for result in Y_pred_list])


# Print the average time taken per invocation
print('Average time taken per invocation:', sum(times) / len(times))

  3%|▎         | 95/3163 [02:06<44:07,  1.16it/s]  

In [None]:
from sklearn.metrics import precision_score, recall_score

# Compute the precision score
precision = precision_score(Y, Y_pred)
print('Precision:', precision)

# Compute the recall score
recall = recall_score(Y, Y_pred)
print('Recall:', recall)

Precision: 0.8493150684931506
Recall: 0.9117647058823529


Precision over 0.8 and Recall over 0.9 is pretty good for this type of problem! (Especially when timing objections can be somewhat ambiguously labelled.)

However, taking 11s on average to call the model to predict a timing objection is MUCH too slow - if this model is intended to surface responses to timing objections in real time to the user, then you need **less than 0.25s latency**. That isn't possible by calling out to an external service like OpenAI's GPT-4, but might be possible through running smaller, lower-latency local models.

Can you build a model that achieves comparable precision and recall to the GPT-4 based model presented here, but with sub 0.25s latency per invocation? 

Anything is fair game while building the low-latency model, including injecting additional human knowledge and sales context, data augmentation or generation, using external services or off the shelf pretrained models, labelling additional data, and more.