In [1]:
import pandas as pd
from pathlib import Path
home = Path.home()

# models = ['gemma3', 'llama4', 'qwq', 'qwen3']
models = ['gemma3']

suffixes = None
# suffixes = '_sent_shuffle'
# suffixes = '_tail'
if suffixes is not None:
    csv_files = [home / f'projects/TLDR/data/paper_html_10.1038/abs_annotation/generated_annotations/{model}{suffixes}.txt' for model in models]
else:
    csv_files = [home / f'projects/TLDR/data/paper_html_10.1038/abs_annotation/generated_annotations/{model}.txt' for model in models]

df = pd.read_csv(home / 'projects/TLDR/data/paper_html_10.1038/abs_annotation/test.tsv', sep='\t')
for model, csv_file in zip(models, csv_files):
    single_df = pd.read_csv(csv_file, sep='\t', header=None, names=[model])
    df = df.join(single_df)

for index in pd.read_csv(home / "projects/TLDR/description/invalid_entry_in_test.txt", sep='\t', header=None).values.flatten().tolist():
    df = df.drop(index-2)  # Adjusting for zero-based index
df

Unnamed: 0,doi,paper_id,abstract,annotation,gemma3
0,10.1073/pnas.91.7.2757,107202074,The origin and taxonomic status of domesticate...,A demonstration that cattle have been domestic...,This study refutes the single origin of domest...
1,10.1093/genetics/154.4.1785,83366887,Abstract The domestic pig originates from the ...,Evidence is presented for independent domestic...,This study provides evidence for independent d...
2,10.1073/pnas.96.16.9252,122095374,We previously mapped a quantitative trait locu...,This paper shows how the identity-by-descent a...,This study used fine-mapping methods to identi...
3,10.1101/gr.10.2.220,100831446,A genome-wide linkage disequilibrium (LD) map ...,The pattern of linkage disequilibrium (LD) acr...,Reference 35 reports long-range LD in Dutch bl...
4,10.1126/science.8134840,17452622,The European wild boar was crossed with the do...,The first paper to show the use of divergent i...,This study identifies a major QTL on SSC4 affe...
...,...,...,...,...,...
35631,10.2337/db08-1168,4860455,OBJECTIVE—Regulatory T-cells (Tregs) have cata...,This article describes the good manufacturing ...,This study describes an efficient protocol for...
35632,10.1126/science.aar3246,4860145,Engineering cytokine-receptor pairs Interleuki...,This study reports the generation of an orthog...,Reference 48 describes the engineering of a sy...
35633,10.1126/science.aad2791,62290395,T cells target peptide combos One of the endur...,This article shows that some diabetogenic T ce...,Reference 51 shows that autoreactive T cells c...
35634,10.1073/pnas.1902566116,82979762,Polymorphic HLAs form the primary immune barri...,This article describes the development of gene...,This work demonstrates that a combined strateg...


In [22]:
def make_question(export_tag, abstract, annotation, llm_summary, q_text):
    abstract_html = abstract.replace('\n', '<br>')
    return f"""[[Question:MC:SingleAnswer:Vertical]]
[[ID:{export_tag}]]
<b>Given the following abstract of a paper:</b><br>
<span style="color:#0070c0">"{abstract_html}"</span>
<br><br>
{q_text}
[[Choices]]
{annotation.strip()}
{llm_summary.strip()}
Cannot decide based on the given information
"""

In [28]:
import nltk
import numpy as np
nltk.download('punkt')

def word_count(text):
    return len(nltk.word_tokenize(str(text)))

# 计算长度
df['abstract_len'] = df['abstract'].apply(word_count)
df['gemma3_len'] = df['gemma3'].apply(word_count)

# 计算分位数
abstract_10 = np.percentile(df['abstract_len'], 10)
gemma3_90 = np.percentile(df['gemma3_len'], 90)

print(f"mean abstract length: {df['abstract_len'].mean()}")
print(f"10th percentile of abstract length: {abstract_10}")
print(f"mean gemma3 length: {df['gemma3_len'].mean()}")
print(f"90th percentile of gemma3 length: {gemma3_90}")

# 筛选
filtered_df = df[(df['abstract_len'] >= abstract_10) & (df['gemma3_len'] <= gemma3_90)]

[nltk_data] Downloading package punkt to /home/lyuzhuoqi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


mean abstract length: 191.5761208276017
10th percentile of abstract length: 107.0
mean gemma3 length: 29.06105948738104
90th percentile of gemma3 length: 36.0


In [29]:
filtered_df

Unnamed: 0,doi,paper_id,abstract,annotation,gemma3,abstract_len,gemma3_len
0,10.1073/pnas.91.7.2757,107202074,The origin and taxonomic status of domesticate...,A demonstration that cattle have been domestic...,This study refutes the single origin of domest...,267,27
1,10.1093/genetics/154.4.1785,83366887,Abstract The domestic pig originates from the ...,Evidence is presented for independent domestic...,This study provides evidence for independent d...,139,23
2,10.1073/pnas.96.16.9252,122095374,We previously mapped a quantitative trait locu...,This paper shows how the identity-by-descent a...,This study used fine-mapping methods to identi...,189,34
3,10.1101/gr.10.2.220,100831446,A genome-wide linkage disequilibrium (LD) map ...,The pattern of linkage disequilibrium (LD) acr...,Reference 35 reports long-range LD in Dutch bl...,193,24
4,10.1126/science.8134840,17452622,The European wild boar was crossed with the do...,The first paper to show the use of divergent i...,This study identifies a major QTL on SSC4 affe...,111,24
...,...,...,...,...,...,...,...
35630,10.1182/blood-2004-09-3579,123181209,Abstract We developed an approach that increas...,First study redirecting T reg cells using a ch...,This reference describes a chimeric receptor a...,185,32
35631,10.2337/db08-1168,4860455,OBJECTIVE—Regulatory T-cells (Tregs) have cata...,This article describes the good manufacturing ...,This study describes an efficient protocol for...,261,29
35632,10.1126/science.aar3246,4860145,Engineering cytokine-receptor pairs Interleuki...,This study reports the generation of an orthog...,Reference 48 describes the engineering of a sy...,143,28
35633,10.1126/science.aad2791,62290395,T cells target peptide combos One of the endur...,This article shows that some diabetogenic T ce...,Reference 51 shows that autoreactive T cells c...,117,25


In [30]:
blocks = [
    ('QUALITY', 'Quality Block', "Which of the following summaries do you think is better?"),
    ('HUMANOID', 'Humanoid Block', "Which summary for this paper do you think is written by human authors?"),
]

lines = []
lines.append('[[AdvancedFormat]]')

for block_tag, block_name, q_text in blocks:
    lines.append(f'[[Block:{block_name}]]')
    for idx, row in filtered_df.sample(500).iterrows():
        export_tag = f"{block_tag}_{idx}"
        question = make_question(export_tag, row['abstract'], row['annotation'], row[models[0]], q_text)
        lines.append(question.strip())

with open(home / 'projects/TLDR/evaluation/survey/qualtrics_survey.txt', 'w', encoding='utf-8') as f:
    f.write('\n\n'.join(lines))

In [31]:
df.loc[3864]

doi                                       10.1126/science.1088545
paper_id                                                 62167115
abstract        The prefrontal cortex (PFC) subserves cognitiv...
annotation      This paper reports the first neuroimaging expe...
gemma3          This paper proposes a model of human lateral P...
abstract_len                                                  111
gemma3_len                                                     33
Name: 3864, dtype: object