This notebook was original ran in a docker container where the project directory (i.e. same directory as README.md) is located in `/code`, which is set below. If you run locally you'll need to set the path of your project directory accordingly.

In [1]:
%cd /code

/code


---

The `load_dotenv()` function below loads all the variables found in the `.env` file as environment variables. You must have a `.env` file located in the project directory containing your OpenAI API key, in the following format.

```
OPENAI_API_KEY=sk-...
```

In [44]:
from dotenv import load_dotenv
load_dotenv()
from IPython.display import display, Markdown, HTML

def wprint(string: str, max_width: int = 80) -> None:
    import textwrap
    """Print `string` with a maximum widgth."""
    wrapped_string = textwrap.fill(string, max_width)
    print(wrapped_string)

# OpenAI Chat

Here's a simple example using `GPT-3.5` chat:

In [3]:
from llm_workflow.openai import OpenAIChat

# model = OpenAIChat(model_name='gpt-3.5-turbo', temperature=0.2)
# response_a = model("Write a function that returns the sum of two numbers.")
# wprint(response_a)

In [4]:
# model = OpenAIChat(model_name='gpt-3.5-turbo', temperature=0.2)
# response_b = model("Write a function that returns the sum of two numbers.")

# wprint(response_b)

In [5]:
model.cost

NameError: name 'model' is not defined

In [6]:
from typing import Callable
import re

def extract_code_blocks(markdown_text: str) -> list:
    """Extract code blocks from Markdown text"""
    pattern = re.compile(r'```(?:python)?\s*(.*?)```', re.DOTALL)
    matches = pattern.findall(markdown_text)
    return [match.strip() for match in matches]


# def execute_code_blocks(code_blocks: list[str]) -> list[bool]:
#     """Execute code blocks and determine if the code blocks run successfully."""
#     block_results = []
#     local_namespace = {}
#     for code in code_blocks, start=1):
#         try:
#             _ = exec(code, local_namespace)
#             block_results.append(True)
#         except Exception as e:
#             block_results.append(False)
#     return block_results


import io
import contextlib

def execute_code_blocks(code_blocks: list[str], local_namespace: dict) -> list[bool]:
    """Execute code blocks and determine if the code blocks run successfully."""
    block_results = []
    for code in code_blocks:
        try:
            with contextlib.redirect_stdout(io.StringIO()):
                _ = exec(code, {}, local_namespace)
            block_results.append(True)
        except Exception as e:
            block_results.append(False)
    return block_results


class Trial:
    """TODO."""

    def __init__(
        self,
        model: Callable[[str], str],
        description: str | None = None,
        ):  # noqa
        """TODO."""
        self._model = model
        self.description = description
        self.responses = []
        self.prompts = None
        self.duration_seconds = None
        
    
    def __call__(self, prompts: list[str]) -> None:
        """
        Run the trial.
        Args:
            prompts: A list of prompts to send to the chat model. The list represents consecutive
                turns/requests in a conversation (e.g. a follow up question or request).
        """
        if self.duration_seconds is not None:
            raise ValueError("Trial has already been run.")
        
        self.prompts = prompts
        import time
        start = time.time()
        for prompt in self.prompts:
            self.responses.append(self._model(prompt))
        end = time.time()
        self.duration_seconds = end - start
        local_namespace = {}
        self.code_blocks = [extract_code_blocks(response) for response in self.responses]
        self._code_block_results = [
            execute_code_blocks(code_blocks, local_namespace) for code_blocks in self.code_blocks
        ]
    
    def __str__(self) -> str:
        """TODO."""
        results = ""
        if self.description:
            results += f"{self.description}\n"
        results += f"Time: {self.duration_seconds:.2f} seconds\n"
        results += f"Response Characters: {self.num_response_chars:,}\n"
        results += f"Response Characters per second: {self.response_chars_per_second:.1f}\n"
        results += f"Num code blocks: {self.num_code_blocks}\n"
        percent_successful_code_blocks = (
            self.num_successful_code_blocks / self.num_code_blocks
        )
        results += f"Percent Passing Code blocks: {percent_successful_code_blocks:.1%}\n"
        if self.cost:
            results += f"Cost: ${self.cost:.5f}\n"
        return results
    
    @property
    def num_response_chars(self) -> int:
        """TODO."""
        return sum(len(response) for response in self.responses)

    @property
    def response_chars_per_second(self) -> float:
        """TODO."""
        return self.num_response_chars / self.duration_seconds

    @property
    def num_code_blocks(self) -> int:
        """TODO."""
        return sum(len(code_blocks) for code_blocks in self.code_blocks)

    @property
    def num_successful_code_blocks(self) -> int:
        """TODO."""
        return sum(
            sum(code_block_results) for code_block_results in self._code_block_results
        )

    @property
    def cost(self) -> float:
        """TODO."""
        # if model has a cost attribute, use that
        if hasattr(self._model, 'cost'):
            return self._model.cost
        return None
    

In [7]:
trial = Trial(
    model=OpenAIChat(model_name='gpt-3.5-turbo', temperature=0.0),
    description='GPT-3.5 Turbo',
)

In [8]:
trial(['Write a function that returns the sum of two numbers.'],)
print(trial)

GPT-3.5 Turbo
Time: 2.62 seconds
Response Characters: 441
Response Characters per second: 168.1
Num code blocks: 2
Percent Passing Code blocks: 100.0%
Cost: $0.00026



In [9]:
display(Markdown(trial.responses[0]))

Sure! Here's a simple function in Python that takes two numbers as input and returns their sum:

```python
def sum_numbers(num1, num2):
    return num1 + num2
```

You can use this function by calling it and passing in the two numbers you want to add together. For example:

```python
result = sum_numbers(5, 3)
print(result)  # Output: 8
```

Feel free to modify the function to suit your needs. Let me know if you have any other questions!

In [10]:
trial.code_blocks[0]

['def sum_numbers(num1, num2):\n    return num1 + num2',
 'result = sum_numbers(5, 3)\nprint(result)  # Output: 8']

In [105]:
from typing import TypeVar
Model = TypeVar('Model', bound=Callable[[str], str])

from pydantic import BaseModel

class ModelCreation(BaseModel):
    create: Callable[[], Model]
    description: str


class CompareModels:
    """
    One requirements is that the underlying models need to maintain message history. They are
    passed a list of prompts. The second prompt is a follow up question to the first prompt so
    the model needs to be able to maintain the history of the conversation.
    """
    def __init__(
            self,
            use_cases: list[list[str]],
            model_creations: list[ModelCreation],
        ):
        """TODO."""
        # ensure no model descriptions are duplicated
        model_descriptions = [model_creation.description for model_creation in model_creations]
        if len(model_descriptions) != len(set(model_descriptions)):
            raise ValueError("Model descriptions must be unique.")

        self.use_cases = use_cases
        self._model_creations = model_creations
    
    def __call__(self) -> str:
        self.trials = []
        for use_case in self.use_cases:
            comparisons = []
            for create_model in self._model_creations:
                trial = Trial(model=create_model.create(), description=create_model.description)
                trial(prompts=use_case)
                comparisons.append(trial)
            self.trials.append(comparisons)

    @property
    def num_use_cases(self) -> int:
        """TODO."""
        return len(self.use_cases)

    @property
    def num_models(self) -> int:
        """TODO."""
        return len(self._model_creations)
    
    @property
    def model_descriptions(self) -> str:
        """TODO."""
        return [model_creation.description for model_creation in self._model_creations]
    
    def _sum_property(self, model_description: str, property_name: str) -> float:
        """TODO."""
        total = 0
        for use_case_trials in self.trials:
            for trial in use_case_trials:
                if trial.description == model_description:
                    value = getattr(trial, property_name)
                    if value:
                        total += value
        return total

    def duration_seconds(self, model_description: str) -> float:
        """TODO."""
        return self._sum_property(model_description, 'duration_seconds')

    def num_response_chars(self, model_description: str) -> int:
        """TODO."""
        return self._sum_property(model_description, 'num_response_chars')
    
    def response_chars_per_second(self, model_description: str) -> float:
        """TODO."""
        return self.num_response_chars(model_description) / self.duration_seconds(model_description)
    
    def num_code_blocks(self, model_description: str) -> int:
        """TODO."""
        return self._sum_property(model_description, 'num_code_blocks')
    
    def num_successful_code_blocks(self, model_description: str) -> int:
        """TODO."""
        return self._sum_property(model_description, 'num_successful_code_blocks')
    
    def percent_successful_code_blocks(self, model_description: str) -> float:
        """TODO."""
        return self.num_successful_code_blocks(model_description) / self.num_code_blocks(model_description)
    
    def cost(self, model_description: str) -> float | None:
        """TODO."""
        return self._sum_property(model_description, 'cost')


    def __str__(self) -> str:
        results = ""
        for model_description in self.model_descriptions:
            results += f"{model_description}\n"
            results += f"Time: {self.duration_seconds(model_description):.2f} seconds\n"
            results += f"Response Characters: {self.num_response_chars(model_description):,}\n"
            results += f"Response Characters per second: {self.response_chars_per_second(model_description):.1f}\n"
            results += f"Num code blocks: {self.num_code_blocks(model_description)}\n"
            results += f"Percent Passing Code blocks: {self.percent_successful_code_blocks(model_description):.1%}\n"
            if self.cost(model_description):
                results += f"Cost: ${self.cost(model_description):.5f}\n"
            results += "\n"
        return results


In [106]:
class MockChatModel:
    """Mock chat model for testing."""
    def __init__(self, prompts: list[str], responses: list[str], cost: float = None):
        """TODO."""
        assert len(prompts) == len(responses)
        self.cost = cost
        self.responses = dict(zip(prompts, responses))
        assert len(self.responses) == len(responses)
    
    def __call__(self, prompt: str) -> str:
        """TODO."""
        return self.responses[prompt]


In [107]:
import yaml

# Load data from YAML file
with open("/code/tests/test_data/compare/mock_conversation__sum_function.yml", "r", encoding="utf-8") as f:
    mock_conversation_1 = yaml.safe_load(f)

print(mock_conversation_1['model_2']['description'])
print(mock_conversation_1['model_2']['responses'][0])

Mock ChatGTP 4
Sure, here is a simple function in Python that does that:

```python
def sum_two_numbers(num1, num2):
    return num1 + num2
```

You can use this function like this:

```
result = sum_two_numbers(5, 3)
print(result)  # Outputs: 8
```



In [108]:
# Load data from YAML file
with open("/code/tests/test_data/compare/mock_conversation__mask_email_function.yml", "r", encoding="utf-8") as f:
    mock_conversation_2 = yaml.safe_load(f)

print(mock_conversation_2['model_1']['description'])
print(mock_conversation_2['model_1']['responses'][0])

Mock ChatGTP 3.5 Turbo
Sure! Here's a Python function that masks email addresses by replacing characters in the local part of the email with asterisks (*) while keeping the domain part intact:

```python
def mask_email(email):
    local_part, domain = email.split('@')
    masked_local_part = '*' * len(local_part)
    masked_email = masked_local_part + '@' + domain
    return masked_email
```

Example usage:

```
email = 'example@example.com'
masked_email = mask_email(email)
print(masked_email)  # Output: ********@example.com
```

This function splits the email address into the local part and the domain part using the `split()` method. Then, it creates a masked local part by replacing each character with an asterisk using the * operator and the `len()` function. Finally, it concatenates the masked local part with the domain part and returns the masked email address.



In [109]:
mock_conversation_1['prompts'] + mock_conversation_2['prompts']

mock_conversation_1['model_1']['responses'] + mock_conversation_2['model_1']['responses']

["Sure! Here's a simple function in Python that takes two numbers as input and returns their sum:\n\n```python\ndef sum_numbers(num1, num2):\n    return num1 + num2\n```\n\nYou can use this function by calling it and passing in two numbers as arguments. For example:\n\n```\nresult = sum_numbers(5, 3)\nprint(result)  # Output: 8\n```\n\nFeel free to modify the function to suit your needs. Let me know if you have any other questions!\n",
 'Certainly! Here are a couple of assertion statements to validate the `sum_numbers` function:\n\n```\nassert sum_numbers(5, 3) == 8\nassert sum_numbers(-10, 10) == 0\n```\n\nThese assertions will check if the function returns the expected results. If the function is working correctly, these assertions will not raise any exceptions. However, if the function is incorrect and does not return the expected results, an `AssertionError` will be raised.\n\nYou can add these assertions to your code to ensure that the `sum_numbers` function is working as expected

In [110]:
mock_model_1 = MockChatModel(
    prompts = mock_conversation_1['prompts'] + mock_conversation_2['prompts'],
    responses = mock_conversation_1['model_1']['responses'] + mock_conversation_2['model_1']['responses'],
    cost = 0.01,
)
mock_model_2 = MockChatModel(
    prompts = mock_conversation_1['prompts'] + mock_conversation_2['prompts'],
    responses = mock_conversation_1['model_2']['responses'] + mock_conversation_2['model_2']['responses'],
    cost = None,
)

In [111]:
mock_model_1(mock_conversation_2['prompts'][0])

"Sure! Here's a Python function that masks email addresses by replacing characters in the local part of the email with asterisks (*) while keeping the domain part intact:\n\n```python\ndef mask_email(email):\n    local_part, domain = email.split('@')\n    masked_local_part = '*' * len(local_part)\n    masked_email = masked_local_part + '@' + domain\n    return masked_email\n```\n\nExample usage:\n\n```\nemail = 'example@example.com'\nmasked_email = mask_email(email)\nprint(masked_email)  # Output: ********@example.com\n```\n\nThis function splits the email address into the local part and the domain part using the `split()` method. Then, it creates a masked local part by replacing each character with an asterisk using the * operator and the `len()` function. Finally, it concatenates the masked local part with the domain part and returns the masked email address.\n"

In [112]:
use_cases = [
    mock_conversation_1['prompts'],
    mock_conversation_2['prompts'],
]
use_cases

[['Write a function that returns the sum of two numbers.',
  'Write a few assertion statements validating the function.'],
 ['Generate a function that masks email addresses.',
  'Write a couple assertion statements validating the function.']]

In [113]:
model_creations = [
    # ModelCreation(create=lambda: OpenAIChat(model_name='gpt-3.5-turbo'), description='GPT-3.5 Turbo'),
    # ModelCreation(create=lambda: OpenAIChat(model_name='gpt-4'), description='GPT-4'),
    ModelCreation(create=lambda: mock_model_1, description='Mock GPT-3.5 Turbo'),
    ModelCreation(create=lambda: mock_model_2, description='Mock GPT-4'),
]
comparison = CompareModels(use_cases=use_cases, model_creations=model_creations)
comparison()

In [114]:
print(comparison)

Mock GPT-3.5 Turbo
Time: 0.00 seconds
Response Characters: 2,761
Response Characters per second: 413588333.7
Num code blocks: 6
Percent Passing Code blocks: 83.3%
Cost: $0.02000

Mock GPT-4
Time: 0.00 seconds
Response Characters: 1,847
Response Characters per second: 407730499.4
Num code blocks: 5
Percent Passing Code blocks: 100.0%




In [115]:
# def trial_to_html(trial: Trial) -> str:
#     results = "<ul>\n"
#     results += f"<li>Time: <code>{trial.duration_seconds:.2f}</code> seconds</li>\n"
#     results += f"<li>Characters: <code>{trial.num_characters:,}</code></li>\n"
#     results += f"<li>Characters per second: <code>{trial.characters_per_second:.1f}</code></li>\n"
#     results += f"<li>Num code blocks: <code>{trial.num_code_blocks}</code></li>\n"
#     percent_successful_code_blocks = (
#         trial.num_successful_code_blocks / trial.num_code_blocks
#     )
#     results += f"<li>Percent Passing Code blocks: <code>{percent_successful_code_blocks:.1%}</code></li>\n"
#     if trial.cost:
#         results += f"<li>Cost: <code>${trial.cost:.5f}</code></li>\n"
#     results += "</ul>\n"
#     return results

# trial_to_html(trial=comparison.trials[0][0])

def trial_to_html(trial: Trial) -> str:
    results = '<table style="border-collapse: collapse; width: auto;">\n'
    results += f'<tr><td style="border: none;">Time</td><td style="border: none;"><code>{trial.duration_seconds:.2f} seconds</code></td></tr>\n'
    results += f'<tr><td style="border: none;">Characters</td><td style="border: none;"><code>{trial.num_response_chars:,}</code></td></tr>\n'
    results += f'<tr><td style="border: none;">Characters per second</td><td style="border: none;"><code>{trial.response_chars_per_second:.1f}</code></td></tr>\n'
    results += f'<tr><td style="border: none;">Num code blocks</td><td style="border: none;"><code>{trial.num_code_blocks}</code></td></tr>\n'
    percent_successful_code_blocks = (
        trial.num_successful_code_blocks / trial.num_code_blocks
    )
    results += f'<tr><td style="border: none;">Percent Passing Code blocks</td><td style="border: none;"><code>{percent_successful_code_blocks:.1%}</code></td></tr>\n'
    if trial.cost:
        results += f'<tr><td style="border: none;">Cost</td><td style="border: none;"><code>${trial.cost:.5f}</code></td></tr>\n'
    results += '</table>\n'
    return results

In [116]:
import markdown
from pygments.formatters import HtmlFormatter
import textwrap

def markdown_to_html(trials: list[list[Trial]]):
    # Configure Markdown to HTML conversion
    md = markdown.Markdown(
        extensions=['fenced_code', 'codehilite'],
        extension_configs={
            'codehilite': {
                'css_class': 'highlight',
                'linenums': False,
                'use_pygments': True,
            }
        }
    )
    css = HtmlFormatter().get_style_defs('.highlight')

    # line_break = '<hr style="width:25%; text-align:left; margin-left:10px; height:1px; border-width:0>'  # noqa
    horizontal_line = '<div class="centered-line"></div>'
    # Generate rows and columns
    rows_html = ''
    column_names_html = ''
    for comparison in trials[0]:
        column_names_html += f'<th>{comparison.description}</th>'
        # <th style="width:50%;">Column 2</th>
    
    


    for row in trials:
        columns_html = ''
        for trial in row:
            # columns_html += f'<td style="vertical-align: top;">{trial_to_html(trial)}</td>'
            columns_html += f'<td style="vertical-align: top;">{trial_to_html(trial)}<br>'
            columns_html += f'{horizontal_line}<br>'
            for prompt, response in zip(trial.prompts, trial.responses):
                columns_html += '<h3>Prompt</h3><br>'
                columns_html += f'{prompt}<br><br>'
                columns_html += f'{horizontal_line}<br>'
                columns_html += '<h3>Response</h3><br>'
                html = md.convert(response)
                columns_html += f'{html}<br>'
                columns_html += f'{horizontal_line}<br>'


            # columns_html += '<h3>Prompt</h3><br>'
            # columns_html += f'{trial.prompts[0]}<br><br>'
            # columns_html += f'{horizontal_line}<br>'
            # columns_html += '<h3>Response</h3><br>'
            # html = md.convert(trial.responses[0])
            # columns_html += f'{html}<br>'
            columns_html += '</td>'
            
        rows_html += f'<tr>{columns_html}</tr>'
    
    # Wrap the HTML and CSS in a complete HTML document with a table
    complete_html = textwrap.dedent(f'''
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Document</title>
        <style>
        {css}
        .centered-line {{
            width: 50%;
            margin-left: 5px; /* Adjust this value to control the left alignment */
            border-top: 1px solid #000; /* You can adjust the color and style as needed */
        }}
        table {{
            width: 100%;
            border-collapse: collapse;
        }}
        th, td {{
            border: 1px solid #B2BEB5;
            padding: 8px;
            text-align: left;
        }}
        </style>
    </head>
    <body>
    <table border="1" style="width:100%; border-collapse: collapse;">
        <thead>
            <tr>
                {column_names_html}
            </tr>
        </thead>
        {rows_html}
    </table>
    </body>
    </html>
    ''')
    
    return complete_html


In [117]:
comparison_html = markdown_to_html(comparison.trials)
with open("/code/examples/comparison_example.html", "w") as file:
    file.write(comparison_html)
display(HTML(comparison_html))


Mock GPT-3.5 Turbo,Mock GPT-4
"Time0.00 seconds Characters1,019 Characters per second237444209.8 Num code blocks3 Percent Passing Code blocks100.0% Cost$0.01000 Prompt Write a function that returns the sum of two numbers. Response Sure! Here's a simple function in Python that takes two numbers as input and returns their sum: def sum_numbers(num1, num2):  return num1 + num2 You can use this function by calling it and passing in two numbers as arguments. For example: result = sum_numbers(5, 3) print(result) # Output: 8 Feel free to modify the function to suit your needs. Let me know if you have any other questions! Prompt Write a few assertion statements validating the function. Response Certainly! Here are a couple of assertion statements to validate the sum_numbers function: assert sum_numbers(5, 3) == 8 assert sum_numbers(-10, 10) == 0 These assertions will check if the function returns the expected results. If the function is working correctly, these assertions will not raise any exceptions. However, if the function is incorrect and does not return the expected results, an AssertionError will be raised. You can add these assertions to your code to ensure that the sum_numbers function is working as expected. Let me know if you need any further assistance!","Time0.00 seconds Characters625 Characters per second291271111.1 Num code blocks3 Percent Passing Code blocks100.0% Prompt Write a function that returns the sum of two numbers. Response Sure, here is a simple function in Python that does that: def sum_two_numbers(num1, num2):  return num1 + num2 You can use this function like this: result = sum_two_numbers(5, 3) print(result) # Outputs: 8 Prompt Write a few assertion statements validating the function. Response Sure, here are a few assertion statements that can be used to validate the function: assert sum_two_numbers(5, 3) == 8, ""Should be 8"" assert sum_two_numbers(-1, 1) == 0, ""Should be 0"" assert sum_two_numbers(0, 0) == 0, ""Should be 0"" assert sum_two_numbers(100, 200) == 300, ""Should be 300"" These statements will throw an error if the function does not return the expected result."
"Time0.00 seconds Characters1,742 Characters per second730647756.8 Num code blocks3 Percent Passing Code blocks66.7% Cost$0.01000 Prompt Generate a function that masks email addresses. Response Sure! Here's a Python function that masks email addresses by replacing characters in the local part of the email with asterisks (*) while keeping the domain part intact: def mask_email(email):  local_part, domain = email.split('@')  masked_local_part = '*' * len(local_part)  masked_email = masked_local_part + '@' + domain  return masked_email Example usage: email = 'example@example.com' masked_email = mask_email(email) print(masked_email) # Output: ********@example.com This function splits the email address into the local part and the domain part using the split() method. Then, it creates a masked local part by replacing each character with an asterisk using the * operator and the len() function. Finally, it concatenates the masked local part with the domain part and returns the masked email address. Prompt Write a couple assertion statements validating the function. Response Certainly! Here are a couple of assertion statements to validate the mask_email() function: # Test case 1: Masking email with alphanumeric local part email1 = 'example123@example.com' assert mask_email(email1) == '***********@example.com' # Test case 2: Masking email with special characters in local part email2 = 'ex@mple@example.com' assert mask_email(email2) == '******@example.com' In the first test case, the email address 'example123@example.com' has an alphanumeric local part. The expected masked email address is '***********@example.com'. In the second test case, the email address 'ex@mple@example.com' has special characters in the local part. The expected masked email address is '******@example.com'. If both assertions pass without throwing any exceptions, it means that the mask_email() function is working correctly for these test cases.","Time0.00 seconds Characters1,222 Characters per second512543948.8 Num code blocks2 Percent Passing Code blocks100.0% Prompt Generate a function that masks email addresses. Response Sure, here's a simple Python function that masks email addresses: def mask_email(email):  try:  email_parts = email.split('@')  # Mask first part  masked_part = email_parts[0][0] + ""****"" + email_parts[0][-1]  # Combine masked part and domain  masked_email = masked_part + '@' + email_parts[1]  return masked_email  except Exception as e:  print(""An error occurred: "", e)  return None This function masks all characters of the local part of the email (before the '@') except for the first and last characters with asterisks ('*'). Please note that this is a simple masking function and may not be suitable for all use cases. Prompt Write a couple assertion statements validating the function. Response Sure, here are a few assertion statements to validate the function: assert mask_email(""john.doe@example.com"") == ""j****e@example.com"" assert mask_email(""jane_doe@example.com"") == ""j****e@example.com"" assert mask_email(""test@test.com"") == ""t****t@test.com"" These assertions check that the function correctly masks the email addresses. If the function is working correctly, these assertions will pass silently. If the function is not working correctly, one or more of these assertions will raise an AssertionError."

0,1
Time,0.00 seconds
Characters,1019
Characters per second,237444209.8
Num code blocks,3
Percent Passing Code blocks,100.0%
Cost,$0.01000

0,1
Time,0.00 seconds
Characters,625
Characters per second,291271111.1
Num code blocks,3
Percent Passing Code blocks,100.0%

0,1
Time,0.00 seconds
Characters,1742
Characters per second,730647756.8
Num code blocks,3
Percent Passing Code blocks,66.7%
Cost,$0.01000

0,1
Time,0.00 seconds
Characters,1222
Characters per second,512543948.8
Num code blocks,2
Percent Passing Code blocks,100.0%


In [120]:
# Test case 1: Masking email with alphanumeric local part
email1 = 'example123@example.com'
assert mask_email(email1) == '***********@example.com'

# Test case 2: Masking email with special characters in local part
email2 = 'ex@mple@example.com'
assert mask_email(email2) == '******@example.com'

AssertionError: 

In [63]:
print(comparison.trials[0][0])

Mock GPT-3.5 Turbo
Time: 0.00 seconds
Response Characters: 1,301
Response Characters per second: 86615706.4
Num code blocks: 3
Percent Passing Code blocks: 66.7%
Cost: $0.01000



In [104]:
print(comparison.trials[1][0].responses[1])

Certainly! Here are a couple of assertion statements to validate the `mask_email()` function:

```
# Test case 1: Masking email with alphanumeric local part
email1 = 'example123@example.com'
assert mask_email(email1) == '***********@example.com'

# Test case 2: Masking email with special characters in local part
email2 = 'ex@mple@example.com'
assert mask_email(email2) == '******@example.com'
```

In the first test case, the email address `'example123@example.com'` has an alphanumeric local part. The expected masked email address is `'***********@example.com'`.

In the second test case, the email address `'ex@mple@example.com'` has special characters in the local part. The expected masked email address is `'******@example.com'`.

If both assertions pass without throwing any exceptions, it means that the `mask_email()` function is working correctly for these test cases.



In [None]:
trial_1 = comparison.trials[0][0]
trial_1.duration_seconds
trial_1.num_characters
trial_1.characters_per_second
trial_1.num_code_blocks
percent_successful_code_blocks = (
        trial_1.num_successful_code_blocks / trial_1.num_code_blocks
    )
trial_1.cost


trial_2 = comparison.trials[0][1]
trial_2.duration_seconds
trial_2.num_characters
trial_2.characters_per_second
trial_2.num_code_blocks
percent_successful_code_blocks = (
        trial_2.num_successful_code_blocks / trial_2.num_code_blocks
    )
trial_2.cost

In [None]:
import pandas as pd
import plotly.express as px

# Assuming trial_1 and trial_2 are objects with the properties as shown
# Calculate percent change for each property
percent_successful_code_blocks_1 = (
        trial_1.num_successful_code_blocks / trial_1.num_code_blocks
)

percent_successful_code_blocks_2 = (
        trial_2.num_successful_code_blocks / trial_2.num_code_blocks
)

percent_changes = {
    'duration_seconds': ((trial_2.duration_seconds - trial_1.duration_seconds) / trial_1.duration_seconds) * 100,
    'num_characters': ((trial_2.num_characters - trial_1.num_characters) / trial_1.num_characters) * 100,
    'characters_per_second': ((trial_2.characters_per_second - trial_1.characters_per_second) / trial_1.characters_per_second) * 100,
    'num_code_blocks': ((trial_2.num_code_blocks - trial_1.num_code_blocks) / trial_1.num_code_blocks) * 100,
    'percent_successful_code_blocks': ((percent_successful_code_blocks_2 - percent_successful_code_blocks_1) / percent_successful_code_blocks_1) * 100,
}
if trial_1.cost and trial_2.cost:
    percent_changes['cost'] = ((trial_2.cost - trial_1.cost) / trial_1.cost) * 100

# Create a DataFrame
df_percent_changes = pd.DataFrame(list(percent_changes.items()), columns=['Property', 'Percent Change'])

# Create a bar graph using Plotly Express
fig = px.bar(
    df_percent_changes, 
    x='Property', 
    y='Percent Change',
    title='Percent Change from trial_2 to trial_1',
    labels={'Percent Change': 'Percentage (%)', 'Property': ''},
    width=600,
    height=400,
)
fig

In [None]:
# Convert figure to HTML
fig_html = fig.to_html(full_html=False)

html_string = f"""
<html>
<head>
<title>My Dynamic HTML</title>
</head>
<body>
{fig_html}
</body>
</html>
"""

# Write the HTML content to a file or serve it via a web framework
with open('/code/examples/dynamic_page.html', 'w') as html_file:
    html_file.write(html_string)


In [None]:
import difflib


def _create_html_difference_list(value_a: str, value_b: str) -> str:
    """
    Given value_a and value_b return unified_diff, after cleaning up what unified_diff returns.
    Args:
        value_a:
        value_b:
    """
    diff = list(difflib.unified_diff(a=value_a, b=value_b, n=1000))
    if len(diff) == 0:  # equal strings
        diff = [" " + char for char in value_a]
    else:
        diff = diff[3:]
    return diff


def _create_html_change_span(value: str, is_change: bool, change_color: str = '#F1948A') -> str:
    """
    Args:
        value: a single character
        is_change: if True, highlight the character in red
        change_color: color of background to highlight differences.
    Returns:
        e.g. "<span style="background:#ffe6e6";>value</span>"
    """
    background_color = ''
    if is_change:
        background_color = f' style="background:{change_color}";'
    return f'<span{background_color}>{value}</span>'


def _create_html_cell(difference_list: list, is_first_value: bool, change_color: str = '#F1948A'):
    """
    Creates a single cell (e.g. name, domain, etc.) from one company's differences.
    Args:
        difference_list: list returned from create_difference_list
        is_first_value: if True, treats difference_list according to first value.
        change_color: color of background to highlight differences.
    """
    if is_first_value:
        diff = [(x[1:], x[0] != ' ') for x in difference_list if x[0] in [' ', '-']]
    else:
        diff = [(x[1:], x[0] != ' ') for x in difference_list if x[0] in [' ', '+']]

    html = [
        _create_html_change_span(value=x[0], is_change=x[1], change_color=change_color)
        for x in diff
    ]
    return ''.join(html)


def diff_text(
        text_a: str | list[str],
        text_b: str | list[str],
        change_color: str = '#F1948A') -> str:
    """
    Returns string as HTML containing highlighted differences between `text_a` and
    `text_b`.

    The HTML will contain a table with a single column that that contains `text_a` on top and
    `text_b` on the bottom.

    All `new line` characters are removed and replaced with a space.

    Args:
        text_a: this text will be represented on the top of each html cell.
        text_b: this text will be represented on the bottom of each html
        cell. change_color: color of background to highlight differences.
    """
    html = '''
    <html>
    <head>
      <style>
        table, th, td { border: 1px solid black; border-collapse: collapse; white-space: normal;}
        .markdown-container {
            font-family: Arial, sans-serif;
            padding: 10px;
            border: 1px solid #ccc;
        }
      </style>
    </head>
    <body style="font-family: monospace">
    '''
    html += '<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>'
    html += """
    <script>
    // Get references to all elements with the "markdown-container" class
    const markdownContainers = document.querySelectorAll(".markdown-container");

    // Your Markdown content
    const markdownText = `
    # Hello, Markdown!

    This is some **Markdown** text.

    - List item 1
    - List item 2
    `;

    // Convert and render the Markdown for each container
    markdownContainers.forEach((container) => {
        container.innerHTML = marked(markdownText);
    });
    </script>
    """
    html += '<table><tr>'
    html += '<th>index</th>'
    html += '<th>diff</th>'
    html += '</tr>'

    # line_break = '<hr style="width:25%; text-align:left; margin-left:10px; height:1px; ' \
    #     'border-width:0;' \
    #     'color:blue; background-color:blue">'
    line_break = '<hr style="border: none; border-left: 1px solid #000; height: 100px;">'

    def create_inline_change(diff_list):
        diff_a = _create_html_cell(
            difference_list=diff_list, is_first_value=True, change_color=change_color
        )
        diff_b = _create_html_cell(
            difference_list=diff_list, is_first_value=False, change_color=change_color
        )
        return f'<td class="markdown-container">{diff_a}</td>' + f'<td>{diff_b}</td>'

    if isinstance(text_a, str):
        assert isinstance(text_b, str)
        text_a = [text_a]
        text_b = [text_b]
    else:
        assert len(text_a) == len(text_b)

    for index in range(len(text_a)):
        html += '<tr>'
        html += f"<td>{index}</td>"
        difference_list = _create_html_difference_list(
            value_a=text_a[index],
            value_b=text_b[index]
        )
        html += create_inline_change(diff_list=difference_list)
        html += '</tr>'
    html += "</table></body></html>"
    return html


In [None]:
print(response_a)

In [None]:
from IPython.display import display, HTML, Markdown
display(Markdown(response_a))
display(Markdown(response_b))

In [None]:
# display html
display(HTML(diff_text(response_a, response_b)))

In [None]:
import markdown