# Description

Analyzes some specific paragraphs.

# Modules

In [1]:
from pathlib import Path

import pandas as pd
from IPython.display import display
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
from proj import conf

# Settings/paths

In [2]:
OUTPUT_FILE = None

In [3]:
# Parameters
OUTPUT_FILE = "/home/miltondp/projects/others/manubot/manubot-gpt-manuscript/content/supplementary_files/Supplementary_File_05-Automatic_assessment.xlsx"

In [4]:
INPUT_DIR = conf.common.LLM_PAIRWISE_DIR
assert INPUT_DIR.exists()
display(INPUT_DIR)

PosixPath('/home/miltondp/projects/others/manubot/manubot-ai-editor-code/base/results/llm_pairwise')

In [5]:
assert (
    OUTPUT_FILE is not None and OUTPUT_FILE.strip() != ""
), "Output file not specified"
OUTPUT_FILE = Path(OUTPUT_FILE).resolve()

assert OUTPUT_FILE.suffix == ".xlsx", "Output file should have the .xslx extension"

OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)

# Load LLM pairwise files

In [6]:
result_files = list(INPUT_DIR.glob("*.pkl"))
display(len(result_files))
display(result_files[:2])

16

[PosixPath('/home/miltondp/projects/others/manubot/manubot-ai-editor-code/base/results/llm_pairwise/epistasis-manuscript--gpt-3.5-turbo--openai_gpt-3.5-turbo.pkl'),
 PosixPath('/home/miltondp/projects/others/manubot/manubot-ai-editor-code/base/results/llm_pairwise/phenoplier-manuscript--gpt-3.5-turbo--openai_gpt-3.5-turbo.pkl')]

In [7]:
pd.read_pickle(result_files[0])

Unnamed: 0,rep_index,paragraph_index,paragraph_section,winner,rationale,winner_score
0,0,0,abstract,Paragraph 1,Paragraph 1 slightly edges out Paragraph A in ...,1.0
1,0,1,introduction,tie,Both paragraphs exhibit clear sentence structu...,0.0
2,0,2,introduction,Paragraph 1,Paragraph 1 is slightly better due to the abse...,1.0
3,0,3,introduction,Paragraph 1,Both paragraphs exhibit clear sentence structu...,1.0
4,0,4,introduction,Paragraph 1,Both paragraphs exhibit clear sentence structu...,1.0
...,...,...,...,...,...,...
310,4,58,methods,tie,Both paragraphs exhibit clear sentence structu...,0.0
311,4,59,methods,Paragraph A,Paragraph A excels in having a clear sentence ...,-1.0
312,4,60,methods,Paragraph 1,Paragraph 1 is slightly better due to maintain...,1.0
313,4,61,methods,tie,Both paragraphs exhibit clear sentence structu...,0.0


In [8]:
result_files[0].name.split("--")

['epistasis-manuscript', 'gpt-3.5-turbo', 'openai_gpt-3.5-turbo.pkl']

In [9]:
all_results = []

for f in result_files:
    print(f.name, flush=True)
    df = pd.read_pickle(f)

    f_name_parts = f.name.split("--")
    idx = 0
    manuscript_code = f_name_parts[idx]
    manuscript_code = manuscript_code.split("-manuscript")[0]
    idx += 1

    manuscript_pr_model = f_name_parts[idx]
    idx += 1

    reversed_paragraphs = False
    if len(f_name_parts) > 3:
        reversed_paragraphs = f_name_parts[idx] == "reversed"
        idx += 1

    llm_judge = f_name_parts[idx].split(".pkl")[0]

    df.insert(0, "llm_judge", llm_judge)
    df.insert(0, "paragraphs_reversed", reversed_paragraphs)
    df.insert(0, "pr_model", manuscript_pr_model)
    df.insert(0, "manuscript_code", manuscript_code)

    all_results.append(df)

epistasis-manuscript--gpt-3.5-turbo--openai_gpt-3.5-turbo.pkl


phenoplier-manuscript--gpt-3.5-turbo--openai_gpt-3.5-turbo.pkl


ccc-manuscript--gpt-3.5-turbo--reversed--openai_gpt-3.5-turbo.pkl


phenoplier-manuscript--gpt-3.5-turbo--reversed--openai_gpt-4-turbo-preview.pkl


phenoplier-manuscript--gpt-3.5-turbo--reversed--openai_gpt-3.5-turbo.pkl


ccc-manuscript--gpt-3.5-turbo--reversed--openai_gpt-4-turbo-preview.pkl


epistasis-manuscript--gpt-3.5-turbo--openai_gpt-4-turbo-preview.pkl


biochatter-manuscript--gpt-3.5-turbo--openai_gpt-4-turbo-preview.pkl


epistasis-manuscript--gpt-3.5-turbo--reversed--openai_gpt-3.5-turbo.pkl


biochatter-manuscript--gpt-3.5-turbo--openai_gpt-3.5-turbo.pkl


ccc-manuscript--gpt-3.5-turbo--openai_gpt-4-turbo-preview.pkl


epistasis-manuscript--gpt-3.5-turbo--reversed--openai_gpt-4-turbo-preview.pkl


ccc-manuscript--gpt-3.5-turbo--openai_gpt-3.5-turbo.pkl


biochatter-manuscript--gpt-3.5-turbo--reversed--openai_gpt-3.5-turbo.pkl


biochatter-manuscript--gpt-3.5-turbo--reversed--openai_gpt-4-turbo-preview.pkl


phenoplier-manuscript--gpt-3.5-turbo--openai_gpt-4-turbo-preview.pkl


In [10]:
df = pd.concat(all_results, ignore_index=True)

In [11]:
df.shape

(3880, 10)

In [12]:
df.head()

Unnamed: 0,manuscript_code,pr_model,paragraphs_reversed,llm_judge,rep_index,paragraph_index,paragraph_section,winner,rationale,winner_score
0,epistasis,gpt-3.5-turbo,False,openai_gpt-3.5-turbo,0,0,abstract,Paragraph 1,Paragraph 1 slightly edges out Paragraph A in ...,1.0
1,epistasis,gpt-3.5-turbo,False,openai_gpt-3.5-turbo,0,1,introduction,tie,Both paragraphs exhibit clear sentence structu...,0.0
2,epistasis,gpt-3.5-turbo,False,openai_gpt-3.5-turbo,0,2,introduction,Paragraph 1,Paragraph 1 is slightly better due to the abse...,1.0
3,epistasis,gpt-3.5-turbo,False,openai_gpt-3.5-turbo,0,3,introduction,Paragraph 1,Both paragraphs exhibit clear sentence structu...,1.0
4,epistasis,gpt-3.5-turbo,False,openai_gpt-3.5-turbo,0,4,introduction,Paragraph 1,Both paragraphs exhibit clear sentence structu...,1.0


# Process data for plotting

In [13]:
df["llm_judge"].unique()

array(['openai_gpt-3.5-turbo', 'openai_gpt-4-turbo-preview'], dtype=object)

In [14]:
df["manuscript_code"].unique()

array(['epistasis', 'phenoplier', 'ccc', 'biochatter'], dtype=object)

In [15]:
df["paragraph_section"].unique()

array(['abstract', 'introduction', 'results', 'discussion', 'methods',
       'supplementary material'], dtype=object)

# Clean illegal characters for openpyxl

In [18]:
df = df.replace(ILLEGAL_CHARACTERS_RE, "", regex=True)

# Save

In [19]:
df.to_excel(OUTPUT_FILE)