In [11]:
import ast  # for converting embeddings saved as strings back to arrays
from openai import OpenAI  # for calling the OpenAI API
import pandas as pd  # for storing text and embeddings data
import tiktoken  # for counting tokens
import os  # for getting API token from env variable OPENAI_API_KEY
from scipy import spatial  # for calculating vector similarities for search

# models
EMBEDDING_MODEL = "text-embedding-3-large"
GPT_MODEL = "gpt-4o"

client = OpenAI()

# download pre-chunked text and pre-computed embeddings
df = pd.read_csv("artisan.csv")

# convert embeddings from CSV str type back to list type
df['embedding'] = df['embedding'].apply(ast.literal_eval)

# Initialize a list to store the last 10 user messages
last_10_messages = []

# Function to add a message to history
def add_message_to_history(message: str, history: list):
    """Adds a message to the history, keeping only the last 10."""
    history.append(message)
    if len(history) > 10:
        history.pop(0)

# Function to construct the message context
def construct_message_context(history: list, current_message: str) -> list:
    """Constructs the message context with the last 10 messages and the current one."""
    messages = [{"role": "system", "content": "You are Ava, Artisan's AI BDR. You answer user questions about the product."}]
    
    for past_message in history:
        messages.append({"role": "user", "content": past_message})
    
    # Add the current message
    messages.append({"role": "user", "content": current_message})
    
    return messages

# Function to manage token count
def manage_token_count(messages: list, model: str = GPT_MODEL, max_tokens: int = 4096 - 500):
    """Truncates the message list to fit within the token limit."""
    while num_tokens(' '.join([m['content'] for m in messages]), model=model) > max_tokens:
        if len(messages) > 2:
            # Remove the oldest user message
            messages.pop(1)
        else:
            break
    return messages

# Function to count tokens
def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    """Return the number of tokens in a string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))

# Search function
def strings_ranked_by_relatedness(
    query: str,
    df: pd.DataFrame,
    relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
    top_n: int = 100
) -> tuple[list[str], list[float]]:
    """Returns a list of strings and relatednesses, sorted from most related to least."""
    query_embedding_response = client.embeddings.create(
        model=EMBEDDING_MODEL,
        input=query,
    )
    query_embedding = query_embedding_response.data[0].embedding
    strings_and_relatednesses = [
        (row["text"], relatedness_fn(query_embedding, row["embedding"]))
        for i, row in df.iterrows()
    ]
    strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
    strings, relatednesses = zip(*strings_and_relatednesses)
    return strings[:top_n], relatednesses[:top_n]

# Function to construct the query message with system prompt
def query_message(
    query: str,
    df: pd.DataFrame,
    model: str,
    token_budget: int
) -> str:
    """Return a message for GPT, with relevant source texts pulled from a dataframe."""

    # System prompt setting the context
    system_prompt = (
        "You are a knowledgeable and helpful employee at Artisan AI, a platform that leverages cutting-edge AI technology "
        "to automate and optimize outbound sales tasks. You answer customer questions in a clear, "
        "professional, and direct manner, using relevant information from the provided data."
    )
    
    # Construct the question part
    question = f"Question: {query}"
    message = ""
    
    for string in strings_ranked_by_relatedness(query, df)[0]:
        next_article = f'\n\nRelevant Information:\n"""\n{string}\n"""'
        if (
            num_tokens(system_prompt + message + next_article + question, model=model)
            > token_budget
        ):
            break
        else:
            message += next_article
    
    # Combine system prompt with the constructed message and question
    return system_prompt + "\n\n" + message + "\n\n" + question

# Main function to answer a query
def ask(
    query: str,
    df: pd.DataFrame = df,
    model: str = GPT_MODEL,
    token_budget: int = 4096 - 500,
    print_message: bool = False,
    history: list = last_10_messages
) -> str:
    """Answers a query using GPT, including the last 10 messages in context."""
    # Add the current query to the history
    add_message_to_history(query, history)
    
    # Construct the full context with history
    messages = construct_message_context(history, query)
    
    # Ensure the token count is within limits
    messages = manage_token_count(messages, model=model, max_tokens=token_budget)
    
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0
    )
    
    response_message = response.choices[0].message.content
    return response_message


In [12]:
ask('What can you do?')

'Artisan is a powerful platform designed to help businesses create, manage, and optimize their digital experiences. Here are some key features and capabilities:\n\n1. **Personalization**: Tailor content and experiences to individual users based on their behavior, preferences, and demographics.\n2. **A/B Testing**: Run experiments to determine which variations of your content perform best.\n3. **Analytics**: Gain insights into user behavior and the performance of your digital experiences.\n4. **Content Management**: Easily create, edit, and manage your digital content without needing extensive technical skills.\n5. **Integration**: Seamlessly integrate with other tools and platforms you’re already using.\n6. **Automation**: Automate repetitive tasks to save time and ensure consistency.\n\nIf you have specific needs or questions, feel free to ask!'

In [6]:
ask('How can I make a marketing campaign on Artisan?')

'Creating a marketing campaign on Artisan is a streamlined process designed to help you engage with your audience effectively. Here’s a step-by-step guide to get you started:\n\n1. **Log In to Your Artisan Account:**\n   - Access your Artisan dashboard by logging in with your credentials.\n\n2. **Navigate to Campaigns:**\n   - On the dashboard, find and click on the "Campaigns" tab. This will take you to the campaign management section.\n\n3. **Create a New Campaign:**\n   - Click on the "Create New Campaign" button. This will open a new campaign setup wizard.\n\n4. **Define Your Campaign Goals:**\n   - Specify the objectives of your campaign. Whether it’s to increase brand awareness, drive sales, or promote a new product, clearly defining your goals will help tailor your campaign effectively.\n\n5. **Select Your Audience:**\n   - Choose the target audience for your campaign. Artisan allows you to segment your audience based on various criteria such as demographics, behavior, and past 