#### Saturday, December 14, 2024

As expected, different models perform, ... well ... , differently! And running the code from the 4090 will ALWAYS be faster than running it from the 2070 super.

In [1]:
# Define two variants of the prompt to test zero-shot
# vs few-shot:
prompt_A = """Product description: A pair of shoes that can
fit any foot size.
Seed words: adaptable, fit, omni-fit.
Product names:"""

prompt_B = """Product description: A home milkshake maker.
Seed words: fast, healthy, compact.
Product names: HomeShaker, Fit Shaker, QuickShake, Shake
Maker

Product description: A watch that can tell accurate time in
space.
Seed words: astronaut, space-hardened, eliptical orbit
Product names: AstroTime, SpaceGuard, Orbit-Accurate,
EliptoTime.

Product description: A pair of shoes that can fit any foot
size.
Seed words: adaptable, fit, omni-fit.
Product names:"""

test_prompts = [prompt_A, prompt_B]

In [3]:
import pandas as pd
from openai import OpenAI
import os

# Set your OpenAI key as an environment variable
# https://platform.openai.com/api-keys
# client = OpenAI(
#   api_key=os.environ['OPENAI_API_KEY'],  # Default
# )

# Point to the local server ... last guy wins.
lmstudio = "http://localhost:1234/v1"
lmstudio = "http://192.168.2.16:1234/v1"

client = OpenAI(base_url=lmstudio, api_key="lm-studio")

model = "qwen2.5-14b-instruct"

# model = "hermes-3-llama-3.2-3b"


In [4]:
def get_response(prompt):
    response = client.chat.completions.create(
        # model="gpt-3.5-turbo",
        model = model ,
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant."
            },
            {
                "role": "user",
                "content": prompt
            }
        ]
    )
    return response.choices[0].message.content

In [5]:
# Iterate through the prompts and get responses
responses = []
num_tests = 5

for idx, prompt in enumerate(test_prompts):

    # prompt number as a letter
    var_name = chr(ord('A') + idx)

    for i in range(num_tests):
        
        # Get a response from the model
        response = get_response(prompt)

        data = {
            "variant": var_name,
            "prompt": prompt, 
            "response": response
            }
        
        responses.append(data)

# 2m 45.4s "qwen2.5-14b-instruct" 2070 super

# 30.0 s "hermes-3-llama-3.2-3b" 2070 super

# 38.8s "qwen2.5-14b-instruct" Qwen2.5-14B-Instruct-Q8_0.gguf 4090 

# 2m 58.9s "qwen2.5-14b-instruct" Qwen2.5-14B-Instruct-Q4_K_M.gguf => 2070 super
# 35.6s    "qwen2.5-14b-instruct" Qwen2.5-14B-Instruct-Q4_K_M.gguf => 4090


In [6]:
# Convert responses into a dataframe
df = pd.DataFrame(responses)

print(df)

  variant                                             prompt  \
0       A  Product description: A pair of shoes that can\...   
1       A  Product description: A pair of shoes that can\...   
2       A  Product description: A pair of shoes that can\...   
3       A  Product description: A pair of shoes that can\...   
4       A  Product description: A pair of shoes that can\...   
5       B  Product description: A home milkshake maker.\n...   
6       B  Product description: A home milkshake maker.\n...   
7       B  Product description: A home milkshake maker.\n...   
8       B  Product description: A home milkshake maker.\n...   
9       B  Product description: A home milkshake maker.\n...   

                                            response  
0  Adaptable Omni-Fit Shoes\nComfortable Omni-Fit...  
1  Adaptable Omni-Fit Shoes\nComfortable Omni-Fit...  
2  Adaptable Omni-Fit Shoes\nComfortable Omni-Fit...  
3  Adaptable Omni-Fit Shoes\nOmni-Fit Shoe Collec...  
4  Adaptable Omni-Fi

In [9]:
# Save the dataframe as a CSV file
df.to_csv("responses_.csv", index=False)


In [10]:
import ipywidgets as widgets
from IPython.display import display
import pandas as pd

# load the responses.csv file
df = pd.read_csv("responses.csv")

# Shuffle the dataframe
df = df.sample(frac=1).reset_index(drop=True)

# df is your dataframe and 'response' is the column with the 
# text you want to test
response_index = 0
# add a new column to store feedback
df['feedback'] = pd.Series(dtype='str') 

In [11]:
def on_button_clicked(b):
    global response_index
    #  convert thumbs up / down to 1 / 0
    user_feedback = 1 if b.description == "\U0001F44D" else 0

    # update the feedback column
    df.at[response_index, 'feedback'] = user_feedback

    response_index += 1
    if response_index < len(df):
        update_response()
    else:
        # save the feedback to a CSV file
        df.to_csv("results.csv", index=False)

        print("A/B testing completed. Here's the results:")
        # Calculate score and num rows for each variant
        summary_df = df.groupby('variant').agg(
            count=('feedback', 'count'), 
            score=('feedback', 'mean')).reset_index()
        print(summary_df)
        
def update_response():
    new_response = df.iloc[response_index]['response']
    if pd.notna(new_response):
        new_response = "<p>" + new_response + "</p>"
    else:
        new_response = "<p>No response</p>"
    response.value = new_response
    count_label.value = f"Response: {response_index + 1}"
    count_label.value += f"/{len(df)}"

In [12]:
response = widgets.HTML()
count_label = widgets.Label()

update_response()

thumbs_up_button = widgets.Button(description='\U0001F44D')
thumbs_up_button.on_click(on_button_clicked)

thumbs_down_button = widgets.Button(
    description='\U0001F44E')
thumbs_down_button.on_click(on_button_clicked)

button_box = widgets.HBox([thumbs_down_button, 
thumbs_up_button])

display(response, button_box, count_label)

HTML(value='<p>AdaptaFit, OmniFit, FitFlex, UniversalFit</p>')

HBox(children=(Button(description='👎', style=ButtonStyle()), Button(description='👍', style=ButtonStyle())))

Label(value='Response: 1/10')