# Generate Questions (using GPT 4-mini) based on Debate Dialogues for Democrats

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import textwrap

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import BertModel, BertTokenizer, AdamW
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM

from openai import OpenAI
from google.colab import userdata

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


# Generate Questions (using GPT 4-mini) based on Debate Dialogues for Democrats between 2016 and 2024

In [None]:
dem_2016 = pd.read_csv("2016_democrat_data.csv")

In [None]:
dem_2016

Unnamed: 0,actor,dialogue,date,Party,word_count
0,Kaine,"Elaine, thank you for being here tonight, and,...",2016-10-04,Democrat,415
1,Kaine,"Elaine, let me tell you why I trust Hillary Cl...",2016-10-04,Democrat,248
2,Kaine,You both have said you both have said Vladimi...,2016-10-04,Democrat,17
3,Kaine,These guys have praised Vladimir Putin as a gr...,2016-10-04,Democrat,13
4,Kaine,And paid few taxes and lost a billion a year.,2016-10-04,Democrat,10
...,...,...,...,...,...
627,BIDEN,"Well, guess what? We got it – we got it down t...",2024-06-27,Democrat,39
628,BIDEN,You know what that did? That reduced the feder...,2024-06-27,Democrat,29
629,BIDEN,I'm going to make that available to every seni...,2024-06-27,Democrat,28
630,BIDEN,We have – I'm going to make sure we have child...,2024-06-27,Democrat,51


In [None]:
# Sydneys's API key
api_key_me = userdata.get('MIT_NLP_OPEN_AI')
client = OpenAI(api_key=api_key_me)

def chatWithGPT(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )

    return response.choices[0].message.content

# Define a function to categorize each response using GPT-4o-mini
def categorize_response(text):
    prompt = (f"Craft a question a debate moderator might ask that would naturally lead to the following response in a presidential debate. The question should be neutral, engaging, and directly related to the content of the provided response. Only provide the question, no explanation: {text}")
    # Call the GPT-4 o mini API
    return chatWithGPT(prompt)


In [None]:
# Set up a counter to track progress throughout the run
processed_count = 0

# Saving every 100 rows
for i in range(len(dem_2016)):
    dem_2016.at[i, 'gpt_4_mini_question'] = categorize_response(dem_2016.at[i, 'dialogue'])

    # Increment the counter
    processed_count += 1

    # Print progress update every 10 rows
    if processed_count % 100 == 0:
        print(f"Processed {processed_count}/{len(dem_2016)} rows")

    # Save progress every 100 rows
    if processed_count % 100 == 0:
        dem_2016.to_csv('dem_2016_questions.csv', index=False)
        print(f"Checkpoint: Saved progress at {processed_count} rows")

# Save final result after all rows are processed
dem_2016.to_csv('dem_2016_questions.csv', index=False)
print("Processing complete. Final CSV saved.")

result = dem_2016

Processed 100/632 rows
Checkpoint: Saved progress at 100 rows
Processed 200/632 rows
Checkpoint: Saved progress at 200 rows
Processed 300/632 rows
Checkpoint: Saved progress at 300 rows
Processed 400/632 rows
Checkpoint: Saved progress at 400 rows
Processed 500/632 rows
Checkpoint: Saved progress at 500 rows
Processed 600/632 rows
Checkpoint: Saved progress at 600 rows
Processing complete. Final CSV saved.


In [None]:
dem_2016.head(20)

Unnamed: 0,actor,dialogue,date,Party,word_count,gpt_4_mini_question
0,Kaine,"Elaine, thank you for being here tonight, and,...",2016-10-04,Democrat,415,What experiences do you believe have best prep...
1,Kaine,"Elaine, let me tell you why I trust Hillary Cl...",2016-10-04,Democrat,248,"""How do you evaluate the impact of a candidate..."
2,Kaine,You both have said you both have said Vladimi...,2016-10-04,Democrat,17,"""How would you assess the leadership styles of..."
3,Kaine,These guys have praised Vladimir Putin as a gr...,2016-10-04,Democrat,13,What is your stance on the recent comments mad...
4,Kaine,And paid few taxes and lost a billion a year.,2016-10-04,Democrat,10,"""In your view, how should we address the issue..."
5,Kaine,You are Donald Trumps apprentice. Let me talk ...,2016-10-04,Democrat,10,"""Can you describe how your experience working ..."
6,Kaine,"Now, I get to weigh in. Now, let me just say this",2016-10-04,Democrat,12,"""Can you share your perspective on the key iss..."
7,Kaine,Governor Pence Governor Pence doesnt think th...,2016-10-04,Democrat,22,"Governor Pence, how do you assess the current ..."
8,Kaine,Let me tell you this. When Hillary Clinton bec...,2016-10-04,Democrat,23,"Governor Pence, can you discuss how the leader..."
9,Kaine,"Do you know that we had 175,000 troops deploye...",2016-10-04,Democrat,101,What specific actions do you believe the U.S. ...


In [None]:
dem_2016['gpt_4_mini_question'] = dem_2016['gpt_4_mini_question'].str.replace('"', '')
dem_2016.head(20)

Unnamed: 0,actor,dialogue,date,Party,word_count,gpt_4_mini_question
0,Kaine,"Elaine, thank you for being here tonight, and,...",2016-10-04,Democrat,415,What experiences do you believe have best prep...
1,Kaine,"Elaine, let me tell you why I trust Hillary Cl...",2016-10-04,Democrat,248,How do you evaluate the impact of a candidate'...
2,Kaine,You both have said you both have said Vladimi...,2016-10-04,Democrat,17,How would you assess the leadership styles of ...
3,Kaine,These guys have praised Vladimir Putin as a gr...,2016-10-04,Democrat,13,What is your stance on the recent comments mad...
4,Kaine,And paid few taxes and lost a billion a year.,2016-10-04,Democrat,10,"In your view, how should we address the issue ..."
5,Kaine,You are Donald Trumps apprentice. Let me talk ...,2016-10-04,Democrat,10,Can you describe how your experience working w...
6,Kaine,"Now, I get to weigh in. Now, let me just say this",2016-10-04,Democrat,12,Can you share your perspective on the key issu...
7,Kaine,Governor Pence Governor Pence doesnt think th...,2016-10-04,Democrat,22,"Governor Pence, how do you assess the current ..."
8,Kaine,Let me tell you this. When Hillary Clinton bec...,2016-10-04,Democrat,23,"Governor Pence, can you discuss how the leader..."
9,Kaine,"Do you know that we had 175,000 troops deploye...",2016-10-04,Democrat,101,What specific actions do you believe the U.S. ...


In [None]:
dem_2016.to_csv('dem_2016_questions_clean.csv', index=False)

# Generate Questions (using GPT 4-mini) based on Debate Dialogues for Democrats between 2016 and 2024

In [27]:
dem_1976 = pd.read_csv("1976_democrat_data.csv").drop(columns=["Unnamed: 0"])
dem_1976.head()

Unnamed: 0,actor,dialogue,date,Party,word_count
0,Ferraro,"Well, let me first say that I wasnt born at th...",1984-10-11,Democrat,383
1,Ferraro,I dont. Let me say that Im not a believer in p...,1984-10-11,Democrat,132
2,Ferraro,"I, I think what Im going to have to do is Im g...",1984-10-11,Democrat,161
3,Ferraro,With reference to the busing vote that I cast ...,1984-10-11,Democrat,395
4,Ferraro,I do not support the use of quotas. Both Mr. M...,1984-10-11,Democrat,130


In [28]:
# Set up a counter to track progress throughout the run
processed_count = 0

# Saving every 100 rows
for i in range(len(dem_1976)):
    dem_1976.at[i, 'gpt_4_mini_question'] = categorize_response(dem_1976.at[i, 'dialogue'])

    # Increment the counter
    processed_count += 1

    # Print progress update every 10 rows
    if processed_count % 100 == 0:
        print(f"Processed {processed_count}/{len(dem_1976)} rows")

    # Save progress every 100 rows
    if processed_count % 100 == 0:
        dem_1976.to_csv('dem_1976_questions.csv', index=False)
        print(f"Checkpoint: Saved progress at {processed_count} rows")

# Save final result after all rows are processed
dem_1976.to_csv('dem_1976_questions.csv', index=False)
print("Processing complete. Final CSV saved.")

result = dem_1976

Processed 100/158 rows
Checkpoint: Saved progress at 100 rows
Processing complete. Final CSV saved.


In [33]:
dem_1976['gpt_4_mini_question'] = dem_1976['gpt_4_mini_question'].str.replace('"', '')
dem_1976.to_csv('dem_1976_questions_clean.csv')