In [19]:
import os

# Define the target directory
target_directory = r"C:\Users\pablosal\Desktop\gbb-ai-smart-document-processing"

# Check if the directory exists
if os.path.exists(target_directory):
    # Change the current working directory
    os.chdir(target_directory)
    print(f"Directory changed to {os.getcwd()}")
else:
    print(f"Directory {target_directory} does not exist.")

Directory changed to C:\Users\pablosal\Desktop\gbb-ai-smart-document-processing


# Assumptions 📋

### Release Strategy

| Stage                | Target Audience             | Max Users | Dates |
|----------------------|-----------------------------|-----------|-------|
| Alpha                | ESPN Mobile/Internal users  | 10K       | TBD   |
| Beta                 | ESPN Mobile                 | 1M        | TBD   |
| General Availability | ESPN Mobile                 | 30M       | TBD   |
| Production Release   | ESPN Mobile + Web           | 145M      | TBD   |


### Scenarios

- **Average Scenario:**
  - Engagement Percentage: 20%
  - This is the expected average engagement rate where 20% of the users are actively using the chatbot.

- **Worst-Case Scenario:**
  - Engagement Percentage: 60%
  - This is the worst-case engagement rate where 60% of the users are actively using the chatbot, leading to higher resource usage.


### Constants

- `TOKENS_IN_PROMPT_CALL = 220`
  - This represents the number of tokens used in each prompt call to the model. A token can be as short as one character or as long as one word (e.g., "a" or "apple").
  
- `TOKENS_IN_MODEL_RESPONSE = 97`
  - This represents the number of tokens used in each response from the model. Similar to the prompt tokens, these can vary in length.
  
- `TOTAL_TOKENS_PER_QUERY = TOKENS_IN_PROMPT_CALL + TOKENS_IN_MODEL_RESPONSE`
  - This is the total number of tokens used per query, combining both the prompt and the response tokens.
  
- `QUERIES_PER_USER_PER_MINUTE = 4`
  - This represents the number of queries each user makes per minute. This is an average value based on expected user behavior.
  
- `ENGAGEMENT_PERCENTAGE = 0.20`
  - This represents the percentage of users that are actively engaged. In an average scenario, 20% of users are expected to be active. In a worst-case scenario, this could be as high as 60%.
  
- `SESSIONS_PER_USER_PER_DAY = 1`
  - This represents the number of sessions each user has per day. A session is defined as a period of continuous interaction with the chatbot.
  
- `NUMBER_OF_DAYS_ENGAGEMENT_PER_MONTH = 24`
  - This represents the number of days users are engaged per month. This assumes daily engagement over a typical month.

### Studying inputs 

In [20]:
import pandas as pd

# Read the CSV file
try:
    df = pd.read_csv(r"C:\Users\pablosal\Desktop\gbb-ai-smart-document-processing\utils\counts.csv")
except pd.errors.ParserError as e:
    print(f"ParserError: {e}")


df['Input tokens'] = df['Input tokens'] + 200

statistics = {
    'Mean': df.mean(),
    'Median': df.median(),
    'Standard Deviation': df.std(),
    '50th Percentile': df.quantile(0.50),
    '95th Percentile': df.quantile(0.95),
    '99th Percentile': df.quantile(0.99)
}

stats_df = pd.DataFrame(statistics)

stats_df

Unnamed: 0,Mean,Median,Standard Deviation,50th Percentile,95th Percentile,99th Percentile
Input tokens,13924.6,220.0,30498.1,220.0,114847.0,117326.0
Output tokens,175.9,97.0,806.8,97.0,207.0,499.4


### Representativeness of Data Provided by ESPN

The high variability and presence of outliers (utterances) suggest that the dataset may not be very representative of the general population. It is unclear if this data includes load testing scenarios by the development team. If load testing was involved, it could further skew the data with higher token counts. The extreme values heavily influence the mean, making the median a more reliable measure of central tendency for this data.

- **Tokens in prompt call:** 220 (average case scenario)
- **Tokens in model response:** 97 (average case scenario)
- **Total tokens per query:** 317 (average case scenario)

## Calculating Pricing Numbers in Average Scenario (PAYG) Tokens 📊

In [21]:
import pandas as pd

# Constants
TOKENS_IN_PROMPT_CALL = 220
TOKENS_IN_MODEL_RESPONSE = 97
TOTAL_TOKENS_PER_QUERY = TOKENS_IN_PROMPT_CALL + TOKENS_IN_MODEL_RESPONSE
QUERIES_PER_USER_PER_MINUTE = 4
ENGAGEMENT_PERCENTAGE = 0.20  # 20% of users are engaged
SESSIONS_PER_USER_PER_DAY = 1  # 1 session per user per day
NUMBER_OF_DAYS_ENGAGEMENT_PER_MONTH = 24  # 24 days of engagement per month

# Stage data
STAGE_DATA = [
    {
        'stage': 'Alpha',
        'target_audience': 'ESPN Mobile/Internal users',
        'max_users': 10000
    },
    {
        'stage': 'Beta',
        'target_audience': 'ESPN Mobile',
        'max_users': 1000000
    },
    {
        'stage': 'General Availability',
        'target_audience': 'ESPN Mobile',
        'max_users': 30000000
    },
    {
        'stage': 'Production Release',
        'target_audience': 'ESPN Mobile + Web',
        'max_users': 145000000
    }
]

In [22]:
from typing import List, Dict, Any

def calculate_metrics(max_users: int, engagement_percentage: float, sessions_per_user_per_day: int, number_of_days_engagement_per_month: int) -> Dict[str, Any]:
    """
    Calculate various metrics based on user engagement and usage patterns.

    Args:
        max_users (int): The maximum number of users.
        engagement_percentage (float): The percentage of users that are engaged.
        sessions_per_user_per_day (int): The number of sessions per user per day.
        number_of_days_engagement_per_month (int): The number of days users are engaged per month.

    Returns:
        Dict[str, Any]: A dictionary containing calculated metrics.
    """
    active_users = max_users * engagement_percentage
    worst_case_total_request_per_minute = active_users * QUERIES_PER_USER_PER_MINUTE
    total_request_per_day = active_users * sessions_per_user_per_day * QUERIES_PER_USER_PER_MINUTE
    total_requests_per_month = total_request_per_day * number_of_days_engagement_per_month

    total_input_tokens_per_month = total_requests_per_month * TOKENS_IN_PROMPT_CALL
    total_output_tokens_per_month = total_requests_per_month * TOKENS_IN_MODEL_RESPONSE
    total_tokens_per_month = total_input_tokens_per_month + total_output_tokens_per_month

    return {
        'active_users': active_users,
        'worst_cases_total_request_per_minute': worst_case_total_request_per_minute,
        'total_requests_per_month': total_requests_per_month,
        'total_input_tokens_per_month': total_input_tokens_per_month,
        'total_output_tokens_per_month': total_output_tokens_per_month,
        'total_tokens_per_month': total_tokens_per_month
    }

def calculate_for_stages(stages: List[Dict[str, Any]], engagement_percentage: float, sessions_per_user_per_day: int, number_of_days_engagement_per_month: int) -> List[Dict[str, Any]]:
    """
    Calculate metrics for different stages of user engagement.

    Args:
        stages (List[Dict[str, Any]]): A list of dictionaries, each containing stage information.
        engagement_percentage (float): The percentage of users that are engaged.
        sessions_per_user_per_day (int): The number of sessions per user per day.
        number_of_days_engagement_per_month (int): The number of days users are engaged per month.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries containing calculated metrics for each stage.
    """
    results = []
    for stage in stages:
        max_users = stage['max_users']
        metrics = calculate_metrics(max_users, engagement_percentage, sessions_per_user_per_day, number_of_days_engagement_per_month)
        results.append({
            'stage': stage['stage'],
            'target_audience': stage['target_audience'],
            'max_users': max_users,
            'active_users': metrics['active_users'],
            'worst_cases_total_request_per_minute': metrics['worst_cases_total_request_per_minute'],
            'total_requests_per_month': metrics['total_requests_per_month'],
            'total_input_tokens_per_month': metrics['total_input_tokens_per_month'],
            'total_output_tokens_per_month': metrics['total_output_tokens_per_month'],
            'total_tokens_per_month': metrics['total_tokens_per_month']
        })

    return results

In [23]:
import pandas as pd

results = calculate_for_stages(STAGE_DATA, ENGAGEMENT_PERCENTAGE, SESSIONS_PER_USER_PER_DAY, NUMBER_OF_DAYS_ENGAGEMENT_PER_MONTH)

df = pd.DataFrame(results)
pd.set_option('display.float_format', '{:.1f}'.format)

df

Unnamed: 0,stage,target_audience,max_users,active_users,worst_cases_total_request_per_minute,total_requests_per_month,total_input_tokens_per_month,total_output_tokens_per_month,total_tokens_per_month
0,Alpha,ESPN Mobile/Internal users,10000,2000.0,8000.0,192000.0,42240000.0,18624000.0,60864000.0
1,Beta,ESPN Mobile,1000000,200000.0,800000.0,19200000.0,4224000000.0,1862400000.0,6086400000.0
2,General Availability,ESPN Mobile,30000000,6000000.0,24000000.0,576000000.0,126720000000.0,55872000000.0,182592000000.0
3,Production Release,ESPN Mobile + Web,145000000,29000000.0,116000000.0,2784000000.0,612480000000.0,270048000000.0,882528000000.0


#### GPT4o Models Pricing PAYG

In [24]:
# Model pricing
MODELS = {
    'GPT-4o Global Deployment': {'input_cost': 0.005, 'output_cost': 0.015},
    'GPT-4o Regional API': {'input_cost': 0.005, 'output_cost': 0.015},
    'GPT-4o-mini Global Deployment': {'input_cost': 0.00015, 'output_cost': 0.0006},
    'GPT-4o-mini Regional API': {'input_cost': 0.000165, 'output_cost': 0.00066}
}

def calculate_percentage_difference(cost1: float, cost2: float) -> float:
    """
    Calculate the percentage difference between two costs.

    Args:
        cost1 (float): The first cost.
        cost2 (float): The second cost.

    Returns:
        float: The percentage difference.
    """
    return ((cost2 - cost1) / cost1) * 100

def compare_models(models: Dict[str, Dict[str, float]]) -> None:
    """
    Compare the models and print the percentage differences in input and output costs.

    Args:
        models (Dict[str, Dict[str, float]]): A dictionary containing model names and their prices.
    """
    model_names = list(models.keys())
    for i in range(len(model_names)):
        for j in range(i + 1, len(model_names)):
            model1 = model_names[i]
            model2 = model_names[j]
            input_cost_diff = calculate_percentage_difference(models[model1]['input_cost'], models[model2]['input_cost'])
            output_cost_diff = calculate_percentage_difference(models[model1]['output_cost'], models[model2]['output_cost'])
            print(f"Comparing {model1} and {model2}:")
            print(f"  Input cost difference: {input_cost_diff:.2f}%")
            print(f"  Output cost difference: {output_cost_diff:.2f}%")
            print()

compare_models(MODELS)

Comparing GPT-4o Global Deployment and GPT-4o Regional API:
  Input cost difference: 0.00%
  Output cost difference: 0.00%

Comparing GPT-4o Global Deployment and GPT-4o-mini Global Deployment:
  Input cost difference: -97.00%
  Output cost difference: -96.00%

Comparing GPT-4o Global Deployment and GPT-4o-mini Regional API:
  Input cost difference: -96.70%
  Output cost difference: -95.60%

Comparing GPT-4o Regional API and GPT-4o-mini Global Deployment:
  Input cost difference: -97.00%
  Output cost difference: -96.00%

Comparing GPT-4o Regional API and GPT-4o-mini Regional API:
  Input cost difference: -96.70%
  Output cost difference: -95.60%

Comparing GPT-4o-mini Global Deployment and GPT-4o-mini Regional API:
  Input cost difference: 10.00%
  Output cost difference: 10.00%



In [25]:
import math

def format_cost(cost):
    """
    Format the cost to show in thousands or millions.

    Args:
        cost (float): The cost to format.

    Returns:
        str: The formatted cost.
    """
    if cost >= 1_000_000:
        return f"${cost / 1_000_000:.2f}M"
    elif cost >= 1_000:
        return f"${cost / 1_000:.2f}K"
    else:
        return f"${cost:.2f}"

def calculate_costs_per_model(results, models):
    """
    Calculate the costs per model based on the results and model prices.

    Args:
        results (list): A list of dictionaries containing the results.
        models (dict): A dictionary containing model names and their prices.

    Returns:
        list: A list of dictionaries containing the cost results.
    """
    cost_results = []
    for result in results:
        for model_name, model_prices in models.items():
            input_cost = result['total_input_tokens_per_month'] / 1000 * model_prices['input_cost']
            output_cost = result['total_output_tokens_per_month'] / 1000 * model_prices['output_cost']
            total_cost = input_cost + output_cost
            cost_result = {
                'stage': result['stage'],
                'target_audience': result['target_audience'],
                'max_users': result['max_users'],
                'model': model_name,
                'total_input_tokens_per_month': int(result['total_input_tokens_per_month']),
                'total_output_tokens_per_month': int(result['total_output_tokens_per_month']),
                'input_cost_per_month': format_cost(input_cost),
                'output_cost_per_month': format_cost(output_cost),
                'total_cost_per_month': format_cost(total_cost)
            }
            cost_results.append(cost_result)
    return cost_results

In [26]:
import pandas as pd

cost = calculate_costs_per_model(results, MODELS)
cost_df = pd.DataFrame(cost)

cost_df

Unnamed: 0,stage,target_audience,max_users,model,total_input_tokens_per_month,total_output_tokens_per_month,input_cost_per_month,output_cost_per_month,total_cost_per_month
0,Alpha,ESPN Mobile/Internal users,10000,GPT-4o Global Deployment,42240000,18624000,$211.20,$279.36,$490.56
1,Alpha,ESPN Mobile/Internal users,10000,GPT-4o Regional API,42240000,18624000,$211.20,$279.36,$490.56
2,Alpha,ESPN Mobile/Internal users,10000,GPT-4o-mini Global Deployment,42240000,18624000,$6.34,$11.17,$17.51
3,Alpha,ESPN Mobile/Internal users,10000,GPT-4o-mini Regional API,42240000,18624000,$6.97,$12.29,$19.26
4,Beta,ESPN Mobile,1000000,GPT-4o Global Deployment,4224000000,1862400000,$21.12K,$27.94K,$49.06K
5,Beta,ESPN Mobile,1000000,GPT-4o Regional API,4224000000,1862400000,$21.12K,$27.94K,$49.06K
6,Beta,ESPN Mobile,1000000,GPT-4o-mini Global Deployment,4224000000,1862400000,$633.60,$1.12K,$1.75K
7,Beta,ESPN Mobile,1000000,GPT-4o-mini Regional API,4224000000,1862400000,$696.96,$1.23K,$1.93K
8,General Availability,ESPN Mobile,30000000,GPT-4o Global Deployment,126720000000,55872000000,$633.60K,$838.08K,$1.47M
9,General Availability,ESPN Mobile,30000000,GPT-4o Regional API,126720000000,55872000000,$633.60K,$838.08K,$1.47M


## Calculating Pricing Numbers in Worst-Case Scenario (60% engagement) (PAYG) Tokens 📊

In [27]:
# Constants
TOKENS_IN_PROMPT_CALL = 220
TOKENS_IN_MODEL_RESPONSE = 97
TOTAL_TOKENS_PER_QUERY = TOKENS_IN_PROMPT_CALL + TOKENS_IN_MODEL_RESPONSE
QUERIES_PER_USER_PER_MINUTE = 4
ENGAGEMENT_PERCENTAGE = 0.60  
SESSIONS_PER_USER_PER_DAY = 1  
NUMBER_OF_DAYS_ENGAGEMENT_PER_MONTH = 24 

In [28]:
results_worst_case = calculate_for_stages(STAGE_DATA, ENGAGEMENT_PERCENTAGE, SESSIONS_PER_USER_PER_DAY, NUMBER_OF_DAYS_ENGAGEMENT_PER_MONTH)

# Rename the DataFrame to a more descriptive name
engagement_metrics_df = pd.DataFrame(results_worst_case)

engagement_metrics_df

Unnamed: 0,stage,target_audience,max_users,active_users,worst_cases_total_request_per_minute,total_requests_per_month,total_input_tokens_per_month,total_output_tokens_per_month,total_tokens_per_month
0,Alpha,ESPN Mobile/Internal users,10000,6000.0,24000.0,576000.0,126720000.0,55872000.0,182592000.0
1,Beta,ESPN Mobile,1000000,600000.0,2400000.0,57600000.0,12672000000.0,5587200000.0,18259200000.0
2,General Availability,ESPN Mobile,30000000,18000000.0,72000000.0,1728000000.0,380160000000.0,167616000000.0,547776000000.0
3,Production Release,ESPN Mobile + Web,145000000,87000000.0,348000000.0,8352000000.0,1837440000000.0,810144000000.0,2647584000000.0


In [29]:
cost = calculate_costs_per_model(results_worst_case, MODELS)
cost_df = pd.DataFrame(cost)
cost_df

Unnamed: 0,stage,target_audience,max_users,model,total_input_tokens_per_month,total_output_tokens_per_month,input_cost_per_month,output_cost_per_month,total_cost_per_month
0,Alpha,ESPN Mobile/Internal users,10000,GPT-4o Global Deployment,126720000,55872000,$633.60,$838.08,$1.47K
1,Alpha,ESPN Mobile/Internal users,10000,GPT-4o Regional API,126720000,55872000,$633.60,$838.08,$1.47K
2,Alpha,ESPN Mobile/Internal users,10000,GPT-4o-mini Global Deployment,126720000,55872000,$19.01,$33.52,$52.53
3,Alpha,ESPN Mobile/Internal users,10000,GPT-4o-mini Regional API,126720000,55872000,$20.91,$36.88,$57.78
4,Beta,ESPN Mobile,1000000,GPT-4o Global Deployment,12672000000,5587200000,$63.36K,$83.81K,$147.17K
5,Beta,ESPN Mobile,1000000,GPT-4o Regional API,12672000000,5587200000,$63.36K,$83.81K,$147.17K
6,Beta,ESPN Mobile,1000000,GPT-4o-mini Global Deployment,12672000000,5587200000,$1.90K,$3.35K,$5.25K
7,Beta,ESPN Mobile,1000000,GPT-4o-mini Regional API,12672000000,5587200000,$2.09K,$3.69K,$5.78K
8,General Availability,ESPN Mobile,30000000,GPT-4o Global Deployment,380160000000,167616000000,$1.90M,$2.51M,$4.42M
9,General Availability,ESPN Mobile,30000000,GPT-4o Regional API,380160000000,167616000000,$1.90M,$2.51M,$4.42M
