In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import markdown
from markdown.extensions import codehilite


In [2]:
df = pd.read_csv('../data_files/raw/summaries_V0903_for_humanstudy_detail.csv')

In [3]:
df.model.unique()

array(['deepseek-chat', 'gemini-2.5-pro', 'gpt-5', 'qwen3-32b',
       'web-rev-claude-opus-4-20250514'], dtype=object)

In [4]:
df = df.sample(frac=1).reset_index(drop=True)

In [5]:
df

Unnamed: 0,id,topic,question,summary,model,comment_num,comments,num_samples_group,sample_id,dataset_name,source_path
0,c763306fb89c8d1a,Binary-Online-Identity-Policies,Do you support requiring real-name registratio...,Overall summary:\n- Sentiment: Opposition domi...,gpt-5,10,0: No I don’t support it. I think it’s fine to...,10,3,Binary-Online-Identity-Policies,results/human_judgement/gpt-5/10/Binary-Online...
1,3db5a5a65e677bcd,Binary-Health-Care-Policy,Do you support the government provide basic he...,Overall summary of the comments:\n\n- Overall ...,gpt-5,90,"0: Yes, I think government based healthcare is...",90,2,Binary-Health-Care-Policy,results/human_judgement/gpt-5/90/Binary-Health...
2,db729d705199164d,Openqa-AI-changes-human-life,How has AI changed your life? Please answer br...,"Overall, the comments reflect a positive recep...",deepseek-chat,10,0: Al changed my life by making my daily tasks...,10,1,Openqa-AI-changes-human-life,results/human_judgement/deepseek-chat/10/Openq...
3,ad3135722c23a68f,Binary-Online-Identity-Policies,Do you support requiring real-name registratio...,Here is a summary of the comments provided:\n\...,gemini-2.5-pro,10,0: No I don’t support it. I think it’s fine to...,10,3,Binary-Online-Identity-Policies,results/human_judgement/gemini-2.5-pro/10/Bina...
4,9e4326748941154d,Binary-Health-Care-Policy,Do you support the government provide basic he...,"Based on the comments provided, here is an ove...",gemini-2.5-pro,90,"0: Yes, I think government based healthcare is...",90,1,Binary-Health-Care-Policy,results/human_judgement/gemini-2.5-pro/90/Bina...
...,...,...,...,...,...,...,...,...,...,...,...
745,586806a26aa4cbb2,Binary-Tariff-Policy,Do you think the current tariff policy under t...,Overall summary of the comments:\n\n- Sentimen...,gpt-5,70,0: i think that it will have a negative impact...,70,1,Binary-Tariff-Policy,results/human_judgement/gpt-5/70/Binary-Tariff...
746,4d430c1bb5afeec0,Binary-Health-Care-Policy,Do you support the government provide basic he...,## Summary of Comments on Government-Provided ...,web-rev-claude-opus-4-20250514,30,"0: Yes, I think government based healthcare is...",30,3,Binary-Health-Care-Policy,results/human_judgement/web-rev-claude-opus-4-...
747,620f1aea54fb96a7,Binary-Tariff-Policy,Do you think the current tariff policy under t...,## Summary of Comments on Current Tariff Polic...,web-rev-claude-opus-4-20250514,70,0: i think that it will have a negative impact...,70,2,Binary-Tariff-Policy,results/human_judgement/web-rev-claude-opus-4-...
748,f0835ba6b89f9dc2,Binary-Online-Identity-Policies,Do you support requiring real-name registratio...,## Summary of Comments on Real-Name Registrati...,web-rev-claude-opus-4-20250514,50,0: No I don’t support it. I think it’s fine to...,50,1,Binary-Online-Identity-Policies,results/human_judgement/web-rev-claude-opus-4-...


In [6]:


# Process the dataframe into the desired format
processed_data = []

for i, (_, row) in enumerate(df.iterrows()):
    raw_id = row['id']
    question = row['question']
    summary = row['summary']
    
    # Add a row for the question
    question_entry = {
        "id": f"{raw_id}_question",
        "raw_id": raw_id,
        "question": question,
        "text": '[Question]' + question.replace("\n", "<br>").replace(" Please answer briefly in 2–3 sentences.", "").replace("Please answer briefly in 1–2 sentences.", ""),
        "model": row['model'],
        "summary_length": row.get('summary_length', None)
    }
    processed_data.append(question_entry)
    
    # Add a row for the summary (convert markdown to HTML using markdown package)
    summary_html = markdown.markdown(summary, extensions=['extra', 'codehilite'])
    summary_entry = {
        "id": f"{raw_id}_summary",
        "raw_id": raw_id,
        "question": question,
        "text": "<h4>Below is a summary of people's opinions on the issue.</h4><hr>" + summary_html,
        "model": row['model'],
        "summary_length": row.get('summary_length', None)
    }
    processed_data.append(summary_entry)

# Convert the processed data into a DataFrame
processed_df = pd.DataFrame(processed_data)


In [7]:
processed_df

Unnamed: 0,id,raw_id,question,text,model,summary_length
0,c763306fb89c8d1a_question,c763306fb89c8d1a,Do you support requiring real-name registratio...,[Question]Do you support requiring real-name r...,gpt-5,
1,c763306fb89c8d1a_summary,c763306fb89c8d1a,Do you support requiring real-name registratio...,<h4>Below is a summary of people's opinions on...,gpt-5,
2,3db5a5a65e677bcd_question,3db5a5a65e677bcd,Do you support the government provide basic he...,[Question]Do you support the government provid...,gpt-5,
3,3db5a5a65e677bcd_summary,3db5a5a65e677bcd,Do you support the government provide basic he...,<h4>Below is a summary of people's opinions on...,gpt-5,
4,db729d705199164d_question,db729d705199164d,How has AI changed your life? Please answer br...,[Question]How has AI changed your life?,deepseek-chat,
...,...,...,...,...,...,...
1495,620f1aea54fb96a7_summary,620f1aea54fb96a7,Do you think the current tariff policy under t...,<h4>Below is a summary of people's opinions on...,web-rev-claude-opus-4-20250514,
1496,f0835ba6b89f9dc2_question,f0835ba6b89f9dc2,Do you support requiring real-name registratio...,[Question]Do you support requiring real-name r...,web-rev-claude-opus-4-20250514,
1497,f0835ba6b89f9dc2_summary,f0835ba6b89f9dc2,Do you support requiring real-name registratio...,<h4>Below is a summary of people's opinions on...,web-rev-claude-opus-4-20250514,
1498,fdb822cbba17c0f2_question,fdb822cbba17c0f2,Do you support the government provide basic he...,[Question]Do you support the government provid...,web-rev-claude-opus-4-20250514,


In [8]:
processed_df['text'].value_counts()[:10]

text
[Question]What is your opinion on internet influencers (e.g., streamers, bloggers, short video creators) increasingly becoming a recognized profession?             75
[Question]Do you support the government accepting more refugees fleeing war or persecution?                                                                         75
[Question]Do you support the government having the authority to enforce vaccination and quarantine measures during severe epidemics?                                75
[Question]What is your opinion on the rapid update cycle of electronic products, especially smartphones?                                                            75
[Question]What is your opinion on tipping, and if given the chance, how would you improve or change the current tipping system?                                     75
[Question]Do you support the government provide basic health insurance for everyone?                                                                            

In [9]:
# Check data structure before processing pairs
print("Total rows in df:", len(df))
print("Unique questions:", df['question'].nunique())
print("Unique models:", df['model'].nunique())
print("Unique num_samples_group:", df['num_samples_group'].nunique())

# Check grouping structure
group_sizes = df.groupby(['question', 'num_samples_group']).size()
print("\nGroup sizes (question, num_samples_group):")
print(group_sizes.describe())
print("\nFirst few groups:")
print(group_sizes.head(10))


Total rows in df: 750
Unique questions: 10
Unique models: 5
Unique num_samples_group: 5

Group sizes (question, num_samples_group):
count    50.0
mean     15.0
std       0.0
min      15.0
25%      15.0
50%      15.0
75%      15.0
max      15.0
dtype: float64

First few groups:
question                                                                                                                                                                          num_samples_group
Do you support requiring real-name registration on social media platforms, where users must register and post under their real identity? Please answer briefly in 2–3 sentences.  10                   15
                                                                                                                                                                                  30                   15
                                                                                                                          

In [10]:
processed_df.to_csv('../data_files/processed/sum_humanstudy_rating_full.csv', index=False)

In [11]:
# Build 45 directed ring pairs per (question, num_samples_group), identical schema/HTML to original
import pandas as pd
from pathlib import Path
import markdown

pair_src = Path('/home/ec2-user/LLMs-Scalable-Deliberation/annotation/summary-rating/data_files/processed/sum_humanstudy_pair_full.csv')
rating_src = Path('/home/ec2-user/LLMs-Scalable-Deliberation/annotation/summary-rating/data_files/processed/sum_humanstudy_rating_full.csv')

pair_df = pd.read_csv(pair_src)
rating_df = pd.read_csv(rating_src)
print('Loaded:', len(pair_df), 'pairs,', len(rating_df), 'ratings')

# Build id -> (text, model, length) from rating data
id_to_text = dict(zip(rating_df['id'].astype(str), rating_df['text']))
id_to_model = dict(zip(rating_df['id'].astype(str), rating_df['model']))
id_to_len = dict(zip(rating_df['id'].astype(str), rating_df.get('summary_length', pd.Series([None]*len(rating_df)))))

# Helper: HTML identical to original pair layout

def build_comparison_html(summary_a_html: str, summary_b_html: str) -> str:
    return (
        '<h4>Two summaries of opinions are shown below. Read carefully and answer according to your prior opinion. Both are scrollable.</h4><hr>'
        + "\n        <div style=\"display: flex; gap: 20px;\">\n"
        + '            <div style="flex: 1; border: 1px solid #ccc; padding: 15px; border-radius: 5px;">\n'
        + '                <h4 style="margin-top: 0; color: #2c5aa0;">Summary A</h4>\n'
        + '                <div style="max-height: 800px; overflow-y: auto; padding-right: 10px;">\n'
        + f'                    {summary_a_html}\n'
        + '                </div>\n'
        + '            </div>\n'
        + '            <div style="flex: 1; border: 1px solid #ccc; padding: 15px; border-radius: 5px;">\n'
        + '                <h4 style="margin-top: 0; color: #2c5aa0;">Summary B</h4>\n'
        + '                <div style="max-height: 800px; overflow-y: auto; padding-right: 10px;">\n'
        + f'                    {summary_b_html}\n'
        + '                </div>\n'
        + '            </div>\n'
        + '        </div>'
    )

# Directed ring indices: i -> i+1, i+2, i+3 (mod 15)

def ring_directed_indices(n: int, steps=(1,2,3)):
    for i in range(n):
        for d in steps:
            yield i, (i + d) % n

rows = []
for (question, nsg), g in pair_df.groupby(['question','num_samples_group'], sort=False):
    # Collect the 15 unique summary ids for this group
    ids = pd.unique(pd.concat([g['summary_a_id'], g['summary_b_id']], ignore_index=True).dropna().astype(str))
    if len(ids) < 15:
        # Skip incomplete groups
        continue
    ids = sorted(ids)[:15]  # deterministic order

    for i, j in ring_directed_indices(15, steps=(1,2,3)):
        a_id, b_id = ids[i], ids[j]
        a_text_md = id_to_text.get(a_id, '')
        b_text_md = id_to_text.get(b_id, '')
        a_html = markdown.markdown(a_text_md, extensions=['extra','codehilite'])
        b_html = markdown.markdown(b_text_md, extensions=['extra','codehilite'])
        html = build_comparison_html(a_html, b_html)

        row = {
            'id': f'{a_id}_{b_id}',
            'raw_id': f'{a_id}_{b_id}',
            'question': question,
            'text': html,
            'model': 'comparison',
            'num_samples_group': nsg,
            'summary_length': None,
            'model_a': id_to_model.get(a_id),
            'model_b': id_to_model.get(b_id),
            'summary_a_id': a_id,
            'summary_b_id': b_id,
            'summary_a_text': a_text_md,
            'summary_b_text': b_text_md,
            'summary_length_a': id_to_len.get(a_id),
            'summary_length_b': id_to_len.get(b_id),
        }
        rows.append(row)

ring_df = pd.DataFrame(rows)
print('Built ring pairs:', len(ring_df))
print('Per-group counts (head):')
print(ring_df.groupby(['question','num_samples_group']).size().head())

# Ensure identical column order to original pair_full
cols = ['id','raw_id','question','text','model','num_samples_group','summary_length',
        'model_a','model_b','summary_a_id','summary_b_id','summary_a_text','summary_b_text',
        'summary_length_a','summary_length_b']
for c in cols:
    if c not in ring_df.columns:
        ring_df[c] = None
ring_df = ring_df[cols]

out = Path('/home/ec2-user/LLMs-Scalable-Deliberation/annotation/summary-rating/data_files/processed/sum_humanstudy_pair_full_ring.csv')
ring_df.to_csv(out, index=False)
print('Saved:', out)


Loaded: 4500 pairs, 1500 ratings
Built ring pairs: 2250
Per-group counts (head):
question                                                                                                                                                                          num_samples_group
Do you support requiring real-name registration on social media platforms, where users must register and post under their real identity? Please answer briefly in 2–3 sentences.  10                   45
                                                                                                                                                                                  30                   45
                                                                                                                                                                                  50                   45
                                                                                                                     

# Produce

In [12]:
# Load both datasets for natural join
rating_df = pd.read_csv('../data_files/processed/sum_humanstudy_rating_full.csv')
pair_df = pd.read_csv('../data_files/processed/sum_humanstudy_pair_full_ring.csv')

print(f"Rating data shape: {rating_df.shape}")
print(f"Pair data shape: {pair_df.shape}")

# Check the structure
print("\nRating data columns:", rating_df.columns.tolist())
print("Pair data columns:", pair_df.columns.tolist())

print("\nRating data sample:")
print(rating_df.head(3))
print("\nPair data sample:")
print(pair_df.head(3))


Rating data shape: (1500, 6)
Pair data shape: (2250, 15)

Rating data columns: ['id', 'raw_id', 'question', 'text', 'model', 'summary_length']
Pair data columns: ['id', 'raw_id', 'question', 'text', 'model', 'num_samples_group', 'summary_length', 'model_a', 'model_b', 'summary_a_id', 'summary_b_id', 'summary_a_text', 'summary_b_text', 'summary_length_a', 'summary_length_b']

Rating data sample:
                          id            raw_id  \
0  c763306fb89c8d1a_question  c763306fb89c8d1a   
1   c763306fb89c8d1a_summary  c763306fb89c8d1a   
2  3db5a5a65e677bcd_question  3db5a5a65e677bcd   

                                            question  \
0  Do you support requiring real-name registratio...   
1  Do you support requiring real-name registratio...   
2  Do you support the government provide basic he...   

                                                text  model  summary_length  
0  [Question]Do you support requiring real-name r...  gpt-5             NaN  
1  <h4>Below is a su

In [13]:
# Correct understanding: Both datasets have pairs of rows
# Rating: question + rating summary (2 rows per pair)  
# Pair: question + comparison (2 rows per pair)
# Goal: Create triplets by natural join on raw_id

print("Understanding data structure...")
print(f"Rating data: {len(rating_df)} total rows")
print(f"Pair data: {len(pair_df)} total rows")

# Extract rating pairs (question + summary)
rating_pairs = []
for i in range(0, len(rating_df), 2):
    if i + 1 < len(rating_df):
        question_row = rating_df.iloc[i]
        summary_row = rating_df.iloc[i + 1]
        
        # Verify this is a proper pair
        if (question_row['raw_id'] == summary_row['raw_id'] and 
            '_question' in question_row['id'] and 
            '_summary' in summary_row['id']):
            rating_pairs.append({
                'raw_id': question_row['raw_id'],
                'question': question_row['question'],
                'question_text': question_row['text'],
                'summary_text': summary_row['text'],
                'model': question_row['model'],
                'summary_length': question_row.get('summary_length')
            })

# Debug: Check the pair data structure first
print("Debugging pair data structure...")
print(f"First few rows of pair_df:")
print(pair_df.head(10))
print(f"\nChecking model column values:")
print(pair_df['model'].value_counts())

# Extract pair comparisons (question + comparison)  
pair_comparisons = []
for i in range(0, len(pair_df), 2):
    if i + 1 < len(pair_df):
        question_row = pair_df.iloc[i]
        comparison_row = pair_df.iloc[i + 1]
        
        # Show progress for debugging (only first few and every 100th)
        if i < 20 or i % 200 == 0:
            print(f"Row {i}: model='{question_row['model']}', raw_id='{question_row['raw_id']}'")
            print(f"Row {i+1}: model='{comparison_row['model']}', raw_id='{comparison_row['raw_id']}'")
        
        # Verify this is a proper pair
        # Question row has '_pair_X' suffix, comparison row doesn't
        question_base_id = question_row['raw_id'].rsplit('_pair_', 1)[0] if '_pair_' in question_row['raw_id'] else question_row['raw_id']
        comparison_base_id = comparison_row['raw_id']
        
        if i < 20 or i % 200 == 0:
            print(f"  Comparing: '{question_base_id}' == '{comparison_base_id}'")
        
        if (question_base_id == comparison_base_id and
            question_row['model'] == 'question'):
            pair_comparisons.append({
                'raw_id': comparison_row['raw_id'], 
                'question': question_row['question'],
                'question_text': question_row['text'],
                'comparison_text': comparison_row['text'],
                'model_a': comparison_row.get('model_a'),
                'model_b': comparison_row.get('model_b'),
                'summary_a_id': comparison_row.get('summary_a_id'),
                'summary_b_id': comparison_row.get('summary_b_id'),
                'summary_a_text': comparison_row.get('summary_a_text'),
                'summary_b_text': comparison_row.get('summary_b_text')
            })
            if i < 20 or i % 200 == 0:
                print(f"  -> Added pair comparison")
        else:
            if i < 20 or i % 200 == 0:
                print(f"  -> Skipped: base_id_match={question_base_id == comparison_base_id}, model_check={question_row['model'] == 'question'}")

print(f"Extracted {len(rating_pairs)} rating pairs")
print(f"Extracted {len(pair_comparisons)} comparison pairs")

# Convert to DataFrames for easier joining
rating_pairs_df = pd.DataFrame(rating_pairs)
pair_comparisons_df = pd.DataFrame(pair_comparisons)


Understanding data structure...
Rating data: 1500 total rows
Pair data: 2250 total rows
Debugging pair data structure...
First few rows of pair_df:
                                  id                             raw_id  \
0  38a9bde51b2e82b9_7cb5806a4aa6f3ff  38a9bde51b2e82b9_7cb5806a4aa6f3ff   
1  38a9bde51b2e82b9_8172525bd422435b  38a9bde51b2e82b9_8172525bd422435b   
2  38a9bde51b2e82b9_97800d729fb0bca1  38a9bde51b2e82b9_97800d729fb0bca1   
3  7cb5806a4aa6f3ff_8172525bd422435b  7cb5806a4aa6f3ff_8172525bd422435b   
4  7cb5806a4aa6f3ff_97800d729fb0bca1  7cb5806a4aa6f3ff_97800d729fb0bca1   
5  7cb5806a4aa6f3ff_a368f52fd450713b  7cb5806a4aa6f3ff_a368f52fd450713b   
6  8172525bd422435b_97800d729fb0bca1  8172525bd422435b_97800d729fb0bca1   
7  8172525bd422435b_a368f52fd450713b  8172525bd422435b_a368f52fd450713b   
8  8172525bd422435b_ad3135722c23a68f  8172525bd422435b_ad3135722c23a68f   
9  97800d729fb0bca1_a368f52fd450713b  97800d729fb0bca1_a368f52fd450713b   

                          

In [18]:
# Build triplet_df from ring_df (question + rating(summary_a_id) + comparison)
import pandas as pd
from pathlib import Path

rating_src = Path('/home/ec2-user/LLMs-Scalable-Deliberation/annotation/summary-rating/data_files/processed/sum_humanstudy_rating_full.csv')
if 'ring_df' not in globals():
    # Fallback: load the ring pairs file
    ring_path = Path('/home/ec2-user/LLMs-Scalable-Deliberation/annotation/summary-rating/data_files/processed/sum_humanstudy_pair_full_ring.csv')
    ring_df = pd.read_csv(ring_path)

if 'rating_df' not in globals():
    rating_df = pd.read_csv(rating_src)

rating_text_by_id = dict(zip(rating_df['id'].astype(str), rating_df['text']))

triplet_rows = []
for idx, row in ring_df.iterrows():
    question = row['question']
    clean_question = (
        str(question)
        .replace(' Please answer briefly in 2–3 sentences.', '')
        .replace('Please answer briefly in 1–2 sentences.', '')
    )
    triplet_id = f'triplet_ring_{idx}'

    # Row 1: Question
    triplet_rows.append({
        'id': f'{triplet_id}_question',
        'raw_id': row['raw_id'],
        'question': question,
        'text': f'<h3>[Question]</h3><h4>{clean_question}</h4>',
        'type': 'question',
        'model': 'question',
        'num_samples_group': row.get('num_samples_group'),
        'summary_length': None,
        'model_a': None,
        'model_b': None,
        'summary_a_id': None,
        'summary_b_id': None,
        'summary_a_text': None,
        'summary_b_text': None,
        'summary_length_a': None,
        'summary_length_b': None,
    })

    # Row 2: Rating for summary_a_id
    a_id = str(row['summary_a_id'])
    rating_text = rating_text_by_id.get(a_id, '')
    triplet_rows.append({
        'id': f'{triplet_id}_rating',
        'raw_id': row['raw_id'],
        'question': question,
        'text': rating_text,
        'type': 'rating',
        'model': row.get('model_a', 'rating'),
        'num_samples_group': row.get('num_samples_group'),
        'summary_length': None,
        'model_a': row.get('model_a'),
        'model_b': row.get('model_b'),
        'summary_a_id': row.get('summary_a_id'),
        'summary_b_id': row.get('summary_b_id'),
        'summary_a_text': row.get('summary_a_text'),
        'summary_b_text': row.get('summary_b_text'),
        'summary_length_a': row.get('summary_length_a'),
        'summary_length_b': row.get('summary_length_b'),
    })

    # Row 3: Comparison (ring pair HTML)
    triplet_rows.append({
        'id': f'{triplet_id}_comparison',
        'raw_id': row['raw_id'],
        'question': question,
        'text': row['text'],
        'type': 'comparison',
        'model': 'comparison',
        'num_samples_group': row.get('num_samples_group'),
        'summary_length': row.get('summary_length'),
        'model_a': row.get('model_a'),
        'model_b': row.get('model_b'),
        'summary_a_id': row.get('summary_a_id'),
        'summary_b_id': row.get('summary_b_id'),
        'summary_a_text': row.get('summary_a_text'),
        'summary_b_text': row.get('summary_b_text'),
        'summary_length_a': row.get('summary_length_a'),
        'summary_length_b': row.get('summary_length_b'),
    })

triplet_df = pd.DataFrame(triplet_rows)


In [19]:
# Debug and save (your provided block)
print("\nDebugging triplet_df structure:")
print(f"triplet_df shape: {triplet_df.shape}")
print(f"triplet_df columns: {triplet_df.columns.tolist()}")
print(f"First few rows:")
print(triplet_df.head())

# Check if triplet_df is empty
if len(triplet_df) == 0:
    print("ERROR: triplet_df is empty! Check the matching process.")
else:
    # Verify the triplet structure and show samples
    print("\nVerifying triplet structure:")
    if 'type' in triplet_df.columns:
        print(f"Question entries: {len(triplet_df[triplet_df['type'] == 'question'])}")
        print(f"Rating entries: {len(triplet_df[triplet_df['type'] == 'rating'])}")  
        print(f"Comparison entries: {len(triplet_df[triplet_df['type'] == 'comparison'])}")
        
        # Show sample triplets
        print("\nSample triplet structure:")
        for i in range(0, min(9, len(triplet_df)), 3):
            print(f"\nTriplet {i//3 + 1}:")
            print(f"  Question: {triplet_df.iloc[i]['text']}")
            print(f"  Rating: {triplet_df.iloc[i+1]['text'][:100]}...")
            if i+2 < len(triplet_df):
                print(f"  Comparison: {triplet_df.iloc[i+2]['text'][:100]}...")
    else:
        print("ERROR: 'type' column missing from triplet_df")

# Save the triplet data
output_path = '../data_files/processed/sum_humanstudy_triplet_full_ring.csv'
triplet_df.to_csv(output_path, index=False)
print(f"\nSaved triplet data to: {output_path}")
print(f"Total rows: {len(triplet_df)}")
print(f"Total triplets: {len(triplet_df) // 3}")

# Final verification
triplet_count = len(triplet_df) // 3
remainder = len(triplet_df) % 3
if remainder == 0:
    print(f"✅ Perfect triplet structure: {triplet_count} complete triplets")
else:
    print(f"⚠️ Incomplete triplets: {triplet_count} complete + {remainder} remaining entries")



Debugging triplet_df structure:
triplet_df shape: (6750, 16)
triplet_df columns: ['id', 'raw_id', 'question', 'text', 'type', 'model', 'num_samples_group', 'summary_length', 'model_a', 'model_b', 'summary_a_id', 'summary_b_id', 'summary_a_text', 'summary_b_text', 'summary_length_a', 'summary_length_b']
First few rows:
                          id                             raw_id  \
0    triplet_ring_0_question  38a9bde51b2e82b9_7cb5806a4aa6f3ff   
1      triplet_ring_0_rating  38a9bde51b2e82b9_7cb5806a4aa6f3ff   
2  triplet_ring_0_comparison  38a9bde51b2e82b9_7cb5806a4aa6f3ff   
3    triplet_ring_1_question  38a9bde51b2e82b9_8172525bd422435b   
4      triplet_ring_1_rating  38a9bde51b2e82b9_8172525bd422435b   

                                            question  \
0  Do you support requiring real-name registratio...   
1  Do you support requiring real-name registratio...   
2  Do you support requiring real-name registratio...   
3  Do you support requiring real-name registratio...

In [None]:
len(processed_pair_data)