# Getting Vanilla LLM Responses
### We're going to generate responses from a LLM for each question, based on the habermas_machine_questions.csv file.

The input is the habermas_machine_questions.csv file.
The output is a large CSV of questions, opinions, and LLM responses. LLM responses & questions are 1-to-1 but duplicated across varying opinions. This is not space efficient but makes it easier to work with.

In [31]:
from dotenv import load_dotenv
import pandas as pd, numpy as np, os

# Load environment variables
load_dotenv()
DATA_PATH = os.getenv('DATA_PATH')
TEMP_PATH = os.getenv('TEMP_PATH')

In [32]:
df_questions = pd.read_csv(DATA_PATH+'questions_and_human_perspectives.csv')
print("df_questions.shape: ", df_questions.shape)
df_questions.sample(10)

df_questions.shape:  (3345, 4)


Unnamed: 0.1,Unnamed: 0,question,perspectives,source
480,480,Should it be mandatory to recycle household wa...,['I believe it should be mandatory to recycle ...,habermas
2273,13361,Lying to a cop,['Value: Honesty\nExplanation: Telling the tru...,valueprism
226,226,Is it okay for the government to prohibit peop...,"[""I'm not too sure on this one - on one hand, ...",habermas
2488,50,Arming the police is a good idea,['It is hypocritical and will not benefit civi...,perspectrum
2084,28869,run a red light,['Value: Safety\nExplanation: Running a red li...,valueprism
690,690,Should the UK increase the amount of public fu...,"[""Absolutely not. The BBC should make it's own...",habermas
2213,15895,Pulling a lever to send a train down a track w...,['Value: Preservation of life\nExplanation: In...,valueprism
2228,28120,pardoning julian assange,['Value: Justice\nExplanation: Pardoning Julia...,valueprism
867,867,Should the government support businesses by re...,['The government should support businesses by ...,habermas
663,663,Should the UK government reduce immigration in...,['i feel the uk government should reduce immig...,habermas


In [33]:
# Let's reduce the number of questions to 100 for testing.
df_questions = df_questions.sample(n=5, random_state=42)

In [34]:
import pandas as pd
from tqdm import tqdm
import json
import os

def generate_responses(questions, generation_function, output_path, start_from_checkpoint=True):
    """
    This is a general helper function to generate responses from an LLM and save them to a JSON file. It takes in an arbitrary generation function and can resume from a checkpoint. It will save a JSON file of responses.
    """
    # Load existing responses if any and if we want to resume
    responses = {}
    if start_from_checkpoint:
        with open(output_path, 'r') as f:
            responses = json.load(f)

    # Make sure the directory exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Get questions that haven't been answered yet for this model
    remaining_questions = [
        q for q in questions 
        if q not in responses
    ]
        
    if not remaining_questions:
        print(f"All questions already processed.")
        return
                
    # Process each remaining question with progress bar
    for idx, question in enumerate(tqdm(remaining_questions, desc=f"Generating responses", smoothing=0, ascii=True)):
        try:
            # Generate response
            response = generation_function(question)
            
            # Store response
            responses[question] = response
                
        except Exception as e:
            print(f"\nError processing question '{question}' for: {str(e)}")
            continue

        if idx % 10 == 0:
            # Save to JSON
            with open(output_path, 'w') as f:
                json.dump(responses, f, indent=2)
                
    with open(output_path, 'w') as f:
        json.dump(responses, f, indent=2)
    
    return responses

# Running questions through various LLMs


### OpenAI Models
We're going to start with OpenAI models. You'll need to set your OpenAI API key in the .env file.

In [11]:
# Let's generate an LLM response for each question, for each AI model.

from openai import OpenAI
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

def generate_openai_response(question, model):
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "user", "content": question}
        ]
    )
    return completion.choices[0].message.content

# OpenAI models
oai_models = ['gpt-4o-mini','gpt-3.5-turbo']

for model in oai_models:
    output_file = model+'_responses.json'
    generation_function = lambda x: generate_openai_response(x, model)

    responses = generate_responses(
        questions=df_questions['question'], 
        generation_function=generation_function,
        output_path=TEMP_PATH+output_file,
        start_from_checkpoint=False
    )

Generating responses: 100%|##########| 5/5 [00:37<00:00,  7.42s/it]
Generating responses: 100%|##########| 5/5 [00:09<00:00,  1.93s/it]


### Huggingace Models

In [12]:
import requests
from huggingface_hub import InferenceClient
hf_api_key = os.getenv('HUGGINGFACE_API_KEY')
def query_huggingface(hf_client: InferenceClient, inputs: str, chat:bool=False) -> str:
    """
    This is a helper function to query the Huggingface API.

    Huggingface models are either simple inference endpoints, dedicated endpoints, or chat endpoints. We can use dedicated endpoints by passing in the API URL directly.

    For non-chat models, we use the text_generation endpoint over the chat_completion endpoint.
    """
    if chat:
        messages = [
            {
                "role": "user",
                "content": inputs
            }
        ]
        
        completion =  hf_client.chat_completion(
            messages=messages, 
            max_tokens=1024
        )

        return completion.choices[0].message.content
    else:
        completion =  hf_client.text_generation(
            prompt=inputs, 
            max_new_tokens=500
        )
        return completion

In [97]:
# Because many of the non-instruction tuned based models don't have warm inference endpoints (and aren't available on tools like Together), we're going to create dedicated endpoints on AWS through the Huggingface API.
# The dedicated endpoints take a while to warm up (also you may need to fiddle with URLs)

# models = {
#     'gemma-2-2b-it': "https://z70frgvzih3230r1.us-east-1.aws.endpoints.huggingface.cloud", 
#     'gemma-2-2b': "https://vbkt0rabiunjn175.us-east-1.aws.endpoints.huggingface.cloud",  
#     "llama-3.1-8B": "https://ropkydxq3vq8qff9.us-east-1.aws.endpoints.huggingface.cloud", # These will be slow to run.
#     "llama-3.1-8B-it": "https://dxdj0n50tbi0mgar.us-east-1.aws.endpoints.huggingface.cloud", 
# }

# non_chat_models = ['llama-3.1-8B', 'gemma-2-2b-it', 'gemma-2-2b']

# for model, api_url in models.items():
#     output_file = model+'_responses.json'
#     hf_client = InferenceClient(model=api_url, token=hf_api_key)
#     if model in non_chat_models:
#         generation_function = lambda x: query_huggingface(hf_client, x, chat=False)
#     else:
#         generation_function = lambda x: query_huggingface(hf_client, x, chat=True)

#     responses = generate_responses(
#         questions=df_questions['question.text'], 
#         generation_function=generation_function,
#         output_path=TEMP_PATH+output_file,
#         start_from_checkpoint=False
#     )

Now we're gonna run this with Google Deepmind Models. You may need to run:

`gcloud components update`

`gcloud auth application-default login`


### Google Cloud Models

In [14]:
import vertexai
from vertexai.generative_models import GenerativeModel

# Get the project ID from the .env file
PROJECT_ID = os.getenv('GOOGLE_PROJECT_ID')
print("Running with PROJECT_ID: ", PROJECT_ID)

vertexai.init(project=PROJECT_ID, location="us-central1")

gdp_models = ['gemini-1.5-flash-002']
for model in gdp_models:
    output_file = model+'_responses.json'
    model = GenerativeModel(model)
    generation_function = lambda x: model.generate_content(x).text

    responses = generate_responses(
        questions=df_questions['question'], 
        generation_function=generation_function,
        output_path=TEMP_PATH+output_file,
        start_from_checkpoint=False
    )

Running with PROJECT_ID:  selfanalysis


Generating responses: 100%|##########| 5/5 [00:12<00:00,  2.51s/it]


### Together AI Models

Together is a great model provider but doesn't have a lot of non-instruction tuned models.

In [15]:
# # This is a prompt suggested by Michiel to few shot non-instruction tuned models.
# michiel_prompt = lambda x: f"""Q: What is an ambigram
# A: An ambigram is a word, phrase, symbol, or design that retains its meaning or readability when viewed from a different perspective, orientation, or transformation. It is a type of visual wordplay that often relies on symmetry, rotation, or reflection.

# Q: What are the zipcodes in Cambridge MA?
# A: Cambridge, MA, has multiple ZIP codes depending on the specific area. Here are the primary ones:
# 02138: Harvard Square and surrounding areas
# 02139: Central Square and parts of MIT
# 02140: Porter Square and North Cambridge
# 02141: East Cambridge
# 02142: Kendall Square and parts of MIT
# If you need a ZIP code for a specific address or neighborhood, let me know!

# Q: What country is Steve Irwin from?
# A: Steve Irwin, famously known as "The Crocodile Hunter," was from Australia. Born in Essendon, Victoria, Australia, he was an internationally renowned wildlife expert, conservationist, and television personality known for his passion for wildlife and environmental preservation.

# Q: {x}"""

In [17]:
from together import Together

client = Together(api_key=os.getenv('TOGETHER_API_KEY'))

def generate_together_response(question, model):
  completion = client.chat.completions.create(
      model=model,
      messages=[{"role": "user", "content": question}],
      max_tokens=2048
  )
  return completion.choices[0].message.content

# While you should be able to use the mistral models on HF, together is much faster with a dedicated endpoint and more models.
together_models = {
    'mistral-7b-instruct': 'mistralai/Mistral-7B-Instruct-v0.1',
    # 'mistral-7b': 'mistralai/Mistral-7B-v0.1',
}

for bettername, model in together_models.items():
    output_file = bettername+'_responses.json'

    if bettername == 'mistral-7b':
       nonITprompt = lambda x: f"Question: {x}\n\nAnswer:"
    #    nonITprompt = lambda x: michiel_prompt(x)
       generation_function = lambda x: generate_together_response(nonITprompt(x), model)
    else:
        generation_function = lambda x: generate_together_response(x, model)

    responses = generate_responses(
        questions=df_questions['question'], 
        generation_function=generation_function,
        output_path=TEMP_PATH+output_file,
        start_from_checkpoint=False
    )

Generating responses: 100%|##########| 5/5 [00:14<00:00,  2.96s/it]


## Processing Responses
#### We're now going to load in all the responses and make them into one big dataframe.

In [23]:
df_questions = pd.read_csv(DATA_PATH+'questions_and_human_perspectives.csv') # Load in without and sampling or processing

In [24]:
all_models = oai_models + gdp_models + list(together_models.keys())

In [25]:
# all_models = oai_models + gdp_models + list(together_models.keys()) + list(hf_models.keys())
# all_models = ['gemma-2-2b-it', 'gemma-2-2b', 'gpt-4o-mini', 'gpt-3.5-turbo', 'gemini-1.5-flash-002', 'mistral-7b-instruct']

for model in all_models:
    with open(TEMP_PATH+model+'_responses.json', 'r') as f:
        model_responses = json.load(f)
        df_questions[model] = df_questions['question'].map(model_responses)

In [27]:
df_questions.dropna(inplace=True) # Only keep rows where all models have responses

In [29]:
# We're going to do a basic check to make sure all the model columns are non-null.
assert df_questions.isnull().sum().any() == False

In [30]:
df_questions.to_csv(DATA_PATH+'questions_and_human_perspectives_with_responses.csv', index=False)