In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import markdown
from markdown.extensions import codehilite


In [2]:
df = pd.read_csv('../data_files/raw/summaries_V0903_for_humanstudy_detail.csv')

In [3]:
df.model.unique()

array(['deepseek-chat', 'gemini-2.5-pro', 'gpt-5', 'qwen3-32b',
       'web-rev-claude-opus-4-20250514'], dtype=object)

In [4]:
df = df.sample(frac=1).reset_index(drop=True)

In [5]:
df

Unnamed: 0,id,topic,question,summary,model,comment_num,comments,num_samples_group,sample_id,dataset_name,source_path
0,5a2dd4a2f9e035f3,Binary-Online-Identity-Policies,Do you support requiring real-name registratio...,## Summary of Comments on Real-Name Registrati...,web-rev-claude-opus-4-20250514,30,0: No I don’t support it. I think it’s fine to...,30,3,Binary-Online-Identity-Policies,results/human_judgement/web-rev-claude-opus-4-...
1,dcf7338516e86c11,Openqa-AI-changes-human-life,How has AI changed your life? Please answer br...,## Summary of AI's Impact on Daily Life\n\nThe...,web-rev-claude-opus-4-20250514,90,0: Al changed my life by making my daily tasks...,90,3,Openqa-AI-changes-human-life,results/human_judgement/web-rev-claude-opus-4-...
2,776101552d32a3a2,Openqa-AI-changes-human-life,How has AI changed your life? Please answer br...,The comments provided reflect a wide range of ...,qwen3-32b,70,0: Al changed my life by making my daily tasks...,70,2,Openqa-AI-changes-human-life,results/human_judgement/qwen3-32b/70/Openqa-AI...
3,e7fa707f98c3891c,Openqa-AI-changes-human-life,How has AI changed your life? Please answer br...,## Overall Summary of Comments on AI's Impact ...,web-rev-claude-opus-4-20250514,90,0: Al changed my life by making my daily tasks...,90,2,Openqa-AI-changes-human-life,results/human_judgement/web-rev-claude-opus-4-...
4,1a009e572f9df1ac,Openqa-Influencers-as-a-job,What is your opinion on internet influencers (...,## Summary of Perspectives on Internet Influen...,web-rev-claude-opus-4-20250514,30,"0: It's not for me, but I appreciate the mediu...",30,1,Openqa-Influencers-as-a-job,results/human_judgement/web-rev-claude-opus-4-...
...,...,...,...,...,...,...,...,...,...,...,...
745,f898bce674a1ce0e,Openqa-AI-changes-human-life,How has AI changed your life? Please answer br...,"Based on the comments provided, here is an ove...",gemini-2.5-pro,70,0: Al changed my life by making my daily tasks...,70,2,Openqa-AI-changes-human-life,results/human_judgement/gemini-2.5-pro/70/Open...
746,c83c1f250a50b137,Binary-Health-Care-Policy,Do you support the government provide basic he...,The comments reflect a strong overall support ...,qwen3-32b,50,"0: Yes, I think government based healthcare is...",50,2,Binary-Health-Care-Policy,results/human_judgement/qwen3-32b/50/Binary-He...
747,a0c2bea7a8720c6c,Openqa-Trump-cutting-funding,What are your thoughts on Trump’s decision to ...,"Based on the comments provided, here is a summ...",gemini-2.5-pro,90,0: I think it's ridiculous. Education is liter...,90,3,Openqa-Trump-cutting-funding,results/human_judgement/gemini-2.5-pro/90/Open...
748,04af91757c35fc56,Binary-Health-Care-Policy,Do you support the government provide basic he...,## Summary of Comments on Government-Provided ...,web-rev-claude-opus-4-20250514,90,"0: Yes, I think government based healthcare is...",90,2,Binary-Health-Care-Policy,results/human_judgement/web-rev-claude-opus-4-...


In [6]:


# Process the dataframe into the desired format
processed_data = []

for i, (_, row) in enumerate(df.iterrows()):
    raw_id = row['id']
    question = row['question']
    summary = row['summary']
    
    # Add a row for the question
    question_entry = {
        "id": f"{raw_id}_question",
        "raw_id": raw_id,
        "question": question,
        "text": '[Question]' + question.replace("\n", "<br>").replace(" Please answer briefly in 2–3 sentences.", "").replace("Please answer briefly in 1–2 sentences.", ""),
        "model": row['model'],
        "summary_length": row.get('summary_length', None)
    }
    processed_data.append(question_entry)
    
    # Add a row for the summary (convert markdown to HTML using markdown package)
    summary_html = markdown.markdown(summary, extensions=['extra', 'codehilite'])
    summary_entry = {
        "id": f"{raw_id}_summary",
        "raw_id": raw_id,
        "question": question,
        "text": "Below is a summary of people's opinions on the issue.<br><hr><br>" + summary_html,
        "model": row['model'],
        "summary_length": row.get('summary_length', None)
    }
    processed_data.append(summary_entry)

# Convert the processed data into a DataFrame
processed_df = pd.DataFrame(processed_data)


In [8]:
processed_df

Unnamed: 0,id,raw_id,question,text,model,summary_length
0,5a2dd4a2f9e035f3_question,5a2dd4a2f9e035f3,Do you support requiring real-name registratio...,[Question]Do you support requiring real-name r...,web-rev-claude-opus-4-20250514,
1,5a2dd4a2f9e035f3_summary,5a2dd4a2f9e035f3,Do you support requiring real-name registratio...,Below is a summary of people's opinions on the...,web-rev-claude-opus-4-20250514,
2,dcf7338516e86c11_question,dcf7338516e86c11,How has AI changed your life? Please answer br...,[Question]How has AI changed your life?,web-rev-claude-opus-4-20250514,
3,dcf7338516e86c11_summary,dcf7338516e86c11,How has AI changed your life? Please answer br...,Below is a summary of people's opinions on the...,web-rev-claude-opus-4-20250514,
4,776101552d32a3a2_question,776101552d32a3a2,How has AI changed your life? Please answer br...,[Question]How has AI changed your life?,qwen3-32b,
...,...,...,...,...,...,...
1495,a0c2bea7a8720c6c_summary,a0c2bea7a8720c6c,What are your thoughts on Trump’s decision to ...,Below is a summary of people's opinions on the...,gemini-2.5-pro,
1496,04af91757c35fc56_question,04af91757c35fc56,Do you support the government provide basic he...,[Question]Do you support the government provid...,web-rev-claude-opus-4-20250514,
1497,04af91757c35fc56_summary,04af91757c35fc56,Do you support the government provide basic he...,Below is a summary of people's opinions on the...,web-rev-claude-opus-4-20250514,
1498,ce164d741106ab5e_question,ce164d741106ab5e,Do you support requiring real-name registratio...,[Question]Do you support requiring real-name r...,qwen3-32b,


In [7]:
processed_df['text'].value_counts()[:10]

text
[Question]What is your opinion on internet influencers (e.g., streamers, bloggers, short video creators) increasingly becoming a recognized profession?             75
[Question]Do you think the current tariff policy under the Trump administration will have a positive or negative impact on the overall U.S. economy and society?    75
[Question]What are your thoughts on Trump’s decision to cut academic funding?                                                                                       75
[Question]Do you support requiring real-name registration on social media platforms, where users must register and post under their real identity?                  75
[Question]Do you support the government provide basic health insurance for everyone?                                                                                75
[Question]How has AI changed your life?                                                                                                                         

In [9]:
processed_df[:40].to_csv('../data_files/processed/sum_humanstudy_v0903_pilot_20.csv', index=False)