In [1]:
import os
from openai import OpenAI
import csv

In [2]:
client = OpenAI()

def param_testing(description, context, prompt, temperature=0.2, n=1, max_tokens=50, model="gpt-3.5-turbo"):
    response = client.chat.completions.create(
      model=model,
      messages = [
        {"role": "system", "content": context},
        {"role": "user", "content": f"{prompt} {description}"}
      ],
      n=n,
      temperature=temperature,
      max_tokens=max_tokens
    )
    return response

In this case, I will keep context and prompt static.

In the initial testing, I noticed that the following context/prompt combination was the best for keeping the responses within the word limit. The context did not have a significant effect on the word count that I noticed.

In [3]:
context = "You are an informative assistant, providing clear and factual information."
prompt = "Rewrite this description with 30 words or less:"
description = "As a skilled 3rd year computer science student with a strong foundation in full stack web development and machine learning, I bring a unique blend of technical and soft skills to any team. My expertise spans multiple programming languages including Python, C++, and Java, as well as a range of technical tools such as GitHub, PostgreSQL, and Unity. I have experience in both academic and personal projects. My background includes leading STEM education initiatives, where I have honed my ability to communicate complex concepts effectively and foster creativity and problem-solving skills in others."

In [6]:
def print_response(response):
    # Process each response
    response_text = response.choices[0].message.content
    word_count = len(response_text.split())
    
    print(f"Response: {response_text}")
    print(f"Word count: {word_count}\n")

for i in range(5):
    # Make the request using param_testing function to get 5 responses
    response = param_testing(description, context, prompt)
    
    print_response(response)

Response: Experienced 3rd year computer science student proficient in full stack web development, machine learning, Python, C++, Java, GitHub, PostgreSQL, Unity. Skilled in leadership, communication, and STEM education initiatives.
Word count: 28

Response: Experienced 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity. Strong communicator and leader in STEM education.
Word count: 30

Response: I am a proficient 3rd year computer science student with expertise in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, and Unity.
Word count: 28

Response: Experienced 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity. Strong communicator and leader in STEM education.
Word count: 30

Response: Experienced 3rd year computer science student profic

Actually, these responses seem to have incomplete sentences. I think this can be fixed by having the model write in the first person.

In [7]:
context = "You are an informative assistant, providing clear and factual information that is always written in the first person."

for i in range(5):
    # Make the request using param_testing function to get 5 responses
    response = param_testing(description, context, prompt)
    
    print_response(response)

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Pyt

In [5]:
temperatures = [0.0, 0.5, 1.0]
max_tokens_list = [25, 50, 100]
models = ["gpt-3.5-turbo", "gpt-4-turbo"]

In [11]:
avg_wc_list = []

# Test each temperature
for i, temp in enumerate(temperatures):
    word_counts = []
    
    print(f"---Temperature={temp}---")
    for i in range(5):
        # Make the request using param_testing function
        response = param_testing(description, context, prompt, temperature=temp)

        response_text = response.choices[0].message.content
        word_count = len(response_text.split())
        word_counts.append(word_count)

        print(f"Response: {response_text}")
        print(f"\tWord count: {word_count}\n")
    print("\n")
    
    avg_wc = sum(word_counts) / len(word_counts)
    avg_wc_list.append(avg_wc)

---Temperature=0.0---
Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
	Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
	Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
	Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
	Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machin

In [12]:
for i, avg_wc in enumerate(avg_wc_list):
    print(f"Average WC for Temperature={temperatures[i]}:  {avg_wc}")

Average WC for Temperature=0.0:  29.0
Average WC for Temperature=0.5:  29.8
Average WC for Temperature=1.0:  30.0


My observations of temperature given this prompt/context combo:

- for this word limit of just 30, there was not a significant difference between 0.0 and 0.5
- Higher temperatures were more likely to break the word limit
- Lower temperatures are more consistent in the responses they give. Regenerating responses may be redundant.

In [13]:
avg_wc_list = []

# Test each max_tokens value
for i, max_tokens in enumerate(max_tokens_list):
    word_counts = []
    
    print(f"---Max_Tokens={max_tokens}---")
    for i in range(5):
        # Make the request using param_testing function
        response = param_testing(description, context, prompt, max_tokens=max_tokens)

        response_text = response.choices[0].message.content
        word_count = len(response_text.split())
        word_counts.append(word_count)

        print(f"Response: {response_text}")
        print(f"\tWord count: {word_count}\n")
    print("\n")
    
    avg_wc = sum(word_counts) / len(word_counts)
    avg_wc_list.append(avg_wc)

---Max_Tokens=25---
Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python,
	Word count: 20

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python,
	Word count: 20

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python,
	Word count: 20

Response: I am a 3rd-year computer science student proficient in full-stack web development and machine learning. Skilled in Python,
	Word count: 18

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python,
	Word count: 20



---Max_Tokens=50---
Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
	Word 

In [16]:
for i, avg_wc in enumerate(avg_wc_list):
    print(f"Average WC for max_tokens={max_tokens_list[i]}:  {avg_wc}")

Average WC for max_tokens=25:  19.6
Average WC for max_tokens=50:  29.0
Average WC for max_tokens=100:  28.8


My observations of max_tokens given this prompt/context combo:

- This does limit the response length effectively. However, it simply cuts off the response mid sentence if the max_tokens is exceeded.
- This may be good for rate-limits, but is not suited for limiting word count if completeness and coherence is valued.
- responses seem roughly identical

In [17]:
avg_wc_list = []

# Test each max_tokens value
for i, model in enumerate(models):
    word_counts = []
    
    print(f"---Model={model}---")
    for i in range(5):
        # Make the request using param_testing function
        response = param_testing(description, context, prompt, model=model)

        response_text = response.choices[0].message.content
        word_count = len(response_text.split())
        word_counts.append(word_count)

        print(f"Response: {response_text}")
        print(f"\tWord count: {word_count}\n")
    print("\n")
    
    avg_wc = sum(word_counts) / len(word_counts)
    avg_wc_list.append(avg_wc)

---Model=gpt-3.5-turbo---
Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
	Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
	Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education.
	Word count: 28

Response: I am a 3rd year computer science student proficient in full stack web development and machine learning. Skilled in Python, C++, Java, GitHub, PostgreSQL, Unity, and STEM education leadership.
	Word count: 29

Response: I am a 3rd year computer science student proficient in full stack web development and machine learn

In [18]:
for i, avg_wc in enumerate(avg_wc_list):
    print(f"Average WC for model={models[i]}:  {avg_wc}")

Average WC for model=gpt-3.5-turbo:  29.0
Average WC for model=gpt-4-turbo:  30.4


My observations of models given this prompt/context combo:

- There is some variation in the writing style of each model
- GPT-4 seems more likely to push/exceed the word count, but more testing may be needed to verify this