## install strict libs to prevent proxy error

In [1]:
!pip install openai==1.55.3 httpx==0.27.2 --force-reinstall --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jupyter-server 1.24.0 requires anyio<4,>=3.1.0, but you have anyio 4.7.0 which is incompatible.[0m[31m
[0m

## define polling functions

Define functions to submit a prompt and save the result to a csv result file, and return a df ready to use.  

In [13]:
import os
import sys
import re
import logging
import pandas as pd
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
import openai
from google.colab import userdata

# Configure your OpenAI API key here
openai.api_key = userdata.get('OPENAI_API_KEY')

# Logging configuration
logging.basicConfig(filename="app.log", level=logging.DEBUG, format="%(asctime)s [%(levelname)s] - %(message)s")

def slugify(text, max_length=100):
    return re.sub(r'[^a-zA-Z0-9]+', '-', text).strip('-')[:max_length]

def generate_filename(prompt):
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")[:-4]
    return f"{timestamp}_{slugify(prompt)}.csv"[:150]

def fetch_response(payload, request_id):
    try:
        response = openai.chat.completions.create(model=payload["model"], messages=payload["messages"])
        return request_id, response.choices[0].message.content
    except Exception as e:
        logging.error(f"Request {request_id} failed: {str(e)}")
        return request_id, f"Error: {str(e)}"

def fetch_responses_to_dataframe(prompt, num_requests, fetch_response, payload):
    os.makedirs("results", exist_ok=True)
    output_file = os.path.join("results", generate_filename(prompt))
    print(f"Responses will be saved to: {output_file}")

    responses = []
    with ThreadPoolExecutor(max_workers=min(num_requests, 10)) as executor:
        futures = [executor.submit(fetch_response, payload, i) for i in range(1, num_requests + 1)]
        for completed, future in enumerate(as_completed(futures), 1):
            responses.append(future.result())
            sys.stdout.write(f"\rFetched {completed}/{num_requests} responses...")
            sys.stdout.flush()

    df = pd.DataFrame(responses, columns=["Response Number", "Response"])
    df.to_csv(output_file, index=False, encoding="utf-8")
    print(f"\nFinished! Responses saved to {output_file}")
    return df

# Example usage
prompt = "Tell me a fun fact about space."
num_requests = 5
payload = {"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": prompt}]}

responses_df = fetch_responses_to_dataframe(prompt, num_requests, fetch_response, payload)
print(responses_df)

Responses will be saved to: results/2024121412430535_Tell-me-a-fun-fact-about-space.csv
Fetched 5/5 responses...
Finished! Responses saved to results/2024121412430535_Tell-me-a-fun-fact-about-space.csv
   Response Number                                           Response
0                3  One fun fact about space is that the light we ...
1                4  In space, astronauts can't cry the same way th...
2                2  The International Space Station travels at a s...
3                5  In outer space, astronauts cannot cry because ...
4                1  There is a giant cloud of alcohol in outer spa...


Even more compact function that just returns the freqeuncy ratio of each response

In [28]:
def get_ratios_for_poll(prompt, num_requests):
  payload = {"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": prompt}]}
  responses_df = fetch_responses_to_dataframe(prompt, num_requests, fetch_response, payload)
  return responses_df.Response.str.lower().str.replace('.', '').value_counts(normalize=True).reset_index()



prompt = "say at random: man or woman ?"
num_requests = 10
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121412582278_say-at-random-man-or-woman.csv
Fetched 10/10 responses...
Finished! Responses saved to results/2024121412582278_say-at-random-man-or-woman.csv


Unnamed: 0,Response,proportion
0,woman,0.8
1,man,0.2


## Make some short tests

We poll up to 100 times on simple yes/no questions to analyze the response ratios.

In [22]:
prompt = "say at random: woman or man ?"
num_requests = 100
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121412502408_say-at-random-woman-or-man.csv
Fetched 100/100 responses...
Finished! Responses saved to results/2024121412502408_say-at-random-woman-or-man.csv


Unnamed: 0,Response,proportion
0,woman,0.73
1,man,0.27


In [23]:
prompt = "say at random: man or woman or unicorn ?"
num_requests = 100
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121412504117_say-at-random-man-or-woman-or-unicorn.csv
Fetched 100/100 responses...
Finished! Responses saved to results/2024121412504117_say-at-random-man-or-woman-or-unicorn.csv


Unnamed: 0,Response,proportion
0,unicorn,0.94
1,woman,0.03
2,man,0.02
3,unicorn!,0.01


In [32]:
prompt = "Are the french handsome? Only anser yes or no."
num_requests = 100
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121413043104_Are-the-french-handsome-Only-anser-yes-or-no.csv
Fetched 100/100 responses...
Finished! Responses saved to results/2024121413043104_Are-the-french-handsome-Only-anser-yes-or-no.csv


Unnamed: 0,Response,proportion
0,yes,1.0


In [26]:
prompt = "1+1+1+1+1+1+1+1+1+1+1+1+1 = __ ?"
num_requests = 50
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121412534276_1-1-1-1-1-1-1-1-1-1-1-1-1.csv
Fetched 50/50 responses...
Finished! Responses saved to results/2024121412534276_1-1-1-1-1-1-1-1-1-1-1-1-1.csv


Unnamed: 0,Response,proportion
0,13,0.96
1,it is equal to 13,0.02
2,this equals 13,0.02


## Make longuer tests

Ask for more extensive content, like fun facts, tips, anecdotes.  

In [27]:
prompt = "what is the most expensive dish in the world?"
num_requests = 100
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121412553479_what-is-the-most-expensive-dish-in-the-world.csv
Fetched 100/100 responses...
Finished! Responses saved to results/2024121412553479_what-is-the-most-expensive-dish-in-the-world.csv


Unnamed: 0,Response,proportion
0,"the most expensive dish in the world is the ""b...",0.01
1,"the most expensive dish in the world is the ""s...",0.01
2,"the most expensive dish in the world is the ""s...",0.01
3,one of the most expensive dishes in the world ...,0.01
4,"the most expensive dish in the world is the ""f...",0.01
...,...,...
95,one of the most expensive dishes in the world ...,0.01
96,"the most expensive dish in the world is the ""f...",0.01
97,the most expensive dish in the world is believ...,0.01
98,"the most expensive dish in the world is the ""t...",0.01


In [29]:
prompt = "What is the key to a successful life ?"
num_requests = 5
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121413010604_What-is-the-key-to-a-successful-life.csv
Fetched 5/5 responses...
Finished! Responses saved to results/2024121413010604_What-is-the-key-to-a-successful-life.csv


Unnamed: 0,Response,proportion
0,the key to a successful life is subjective and...,0.2
1,the key to a successful life is subjective and...,0.2
2,the key to a successful life is subjective and...,0.2
3,the key to a successful life can vary dependin...,0.2
4,there is no one-size-fits-all answer to what c...,0.2


In [30]:
prompt = "C'est quelle ville qui fait la meilleure andouillette?"
num_requests = 5
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121413022163_C-est-quelle-ville-qui-fait-la-meilleure-andouillette.csv
Fetched 5/5 responses...
Finished! Responses saved to results/2024121413022163_C-est-quelle-ville-qui-fait-la-meilleure-andouillette.csv


Unnamed: 0,Response,proportion
0,"la ville de troyes, en france, est réputée pou...",0.2
1,la ville d'argentan en normandie est réputée p...,0.2
2,il est difficile de dire quelle ville fait la ...,0.2
3,"la ville de troyes, en france, est réputée pou...",0.2
4,il est difficile de dire quelle ville fait la ...,0.2


In [31]:
prompt = "Quel est le secret d'un bon couscous?"
num_requests = 5
get_ratios_for_poll(prompt, num_requests)

Responses will be saved to: results/2024121413033169_Quel-est-le-secret-d-un-bon-couscous.csv
Fetched 5/5 responses...
Finished! Responses saved to results/2024121413033169_Quel-est-le-secret-d-un-bon-couscous.csv


Unnamed: 0,Response,proportion
0,le secret d'un bon couscous réside principalem...,0.2
1,le secret d'un bon couscous réside dans la cui...,0.2
2,le secret d'un bon couscous réside dans la qua...,0.2
3,le secret d'un bon couscous réside dans la qua...,0.2
4,le secret d'un bon couscous réside dans la qua...,0.2
