In [7]:
# imports
import ast  # for converting embeddings saved as strings back to arrays
import openai  # for calling the OpenAI API
import pandas as pd  # for storing text and embeddings data
import tiktoken  # for counting tokens
from scipy import spatial  # for calculating vector similarities for search
import os

os.environ["OPENAI_API_KEY"] = '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'
openai.api_key = os.environ["OPENAI_API_KEY"]

# models
EMBEDDING_MODEL = "text-embedding-ada-002"
GPT_MODEL = "gpt-3.5-turbo"

In [2]:
# download pre-chunked text and pre-computed embeddings
# this file is ~200 MB, so may take a minute depending on your connection speed
embeddings_path = "csm_nltk.csv"

df = pd.read_csv(embeddings_path)

In [3]:
# convert embeddings from CSV str type back to list type
df['embedding'] = df['embedding'].apply(ast.literal_eval)

In [4]:
# the dataframe has two columns: "text" and "embedding"
df.head()

Unnamed: 0,text,embedding
0,Utah Customer Service \nManagement\nLast updat...,"[-0.01225913967937231, -0.021588172763586044, ..."
1,Some examples and graphics depicted herein are...,"[-0.018668705597519875, -0.010630969889461994,..."
2,Table of Contents\nCustomer Service Management...,"[0.02603786624968052, -0.011726273223757744, 0..."
3,"ServiceNow, the ServiceNow logo, Now, and othe...","[-0.011318285949528217, -0.022291267290711403,..."
4,Customer Service Management\nResolve complex i...,"[-0.017990540713071823, -0.007459492422640324,..."


In [5]:
# search function
def strings_ranked_by_relatedness(
    query: str,
    df: pd.DataFrame,
    relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
    top_n: int = 100
) -> tuple[list[str], list[float]]:
    """Returns a list of strings and relatednesses, sorted from most related to least."""
    query_embedding_response = openai.Embedding.create(
        model=EMBEDDING_MODEL,
        input=query,
    )
    query_embedding = query_embedding_response["data"][0]["embedding"]
    strings_and_relatednesses = [
        (row["text"], relatedness_fn(query_embedding, row["embedding"]))
        for i, row in df.iterrows()
    ]
    strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
    strings, relatednesses = zip(*strings_and_relatednesses)
    return strings[:top_n], relatednesses[:top_n]


In [6]:
def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    """Return the number of tokens in a string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))


def query_message(
    query: str,
    df: pd.DataFrame,
    model: str,
    token_budget: int
) -> str:
    """Return a message for GPT, with relevant source texts pulled from a dataframe."""
    strings, relatednesses = strings_ranked_by_relatedness(query, df)
    introduction = 'Use the below ServiceNow Customer Service Management Documentation to answer the subsequent question. If the answer cannot be found in the article, write "I could not find an answer."'
    question = f"\n\nQuestion: {query}"
    message = introduction
    for string in strings:
        next_article = f'\n\n ServiceNow Doc :\n"""\n{string}\n"""'
        if (
            num_tokens(message + next_article + question, model=model)
            > token_budget
        ):
            break
        else:
            message += next_article
    return message + question


def ask(
    query: str,
    df: pd.DataFrame = df,
    model: str = GPT_MODEL,
    token_budget: int = 4096 - 500,
    print_message: bool = False,
) -> str:
    """Answers a query using GPT and a dataframe of relevant texts and embeddings."""
    message = query_message(query, df, model=model, token_budget=token_budget)
    if print_message:
        print(message)
    messages = [
        {"role": "system", "content": "You answer questions about Customer Service Management, ServiceNow."},
        {"role": "user", "content": message},
    ]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0
    )
    response_message = response["choices"][0]["message"]["content"]
    return response_message



In [10]:
ask('what is ominichannel?')

'Omnichannel refers to the ability to provide support to customers through multiple channels such as chat, email, phone, and messaging apps, and to ensure a seamless experience across all channels. ServiceNow offers Omnichannel Callback for Customer Service Management, which provides an option for customer service agents to call customers back when the wait time for agents is long or agents are unavailable.'

In [11]:
ask('what plugin is needed to install ominichannel?')

'The Omnichannel Callback for Customer Service Management application requires the Omnichannel Callback for Customer Service Management application (com.sn_omnichannel_callback) to be installed.'

In [12]:
ask('what are roles required for guidance?')

'The role required for creating, viewing, updating, and deleting guidances is "sn_gd_guidance.guidance_manager". The role required for viewing guidances is "sn_gd_guidance.guidance_user". Both roles are automatically given to customer service agents and managers.'