This notebook will run a C-SEO method on a list of documents from a domain of the benchmark.
We already provide the improved documents for all C-SEO methods in `data_release/{partition}/selected_docs.json`

In [None]:
import json
from llms import OpenAIHelper, AnthropicHelper
from methods import (
    LLMstxt,
    ContentImprovement,
    Authoritative,
    Statistics,
    Citations,
    Fluency,
    UniqueWords,
    TechnicalTerms,
    SimpleLanguage,
    Quotes,
)
from config import AdoptionMode
import os

# Setup

In [None]:
notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, ".."))

config_path = os.path.join(project_root, "config.json")
with open(config_path, "r") as f:
    config = json.load(f)

os.environ["OPENAI_API_KEY"] = config["OPENAI_API_KEY"]

In [None]:
# load the documents you want to improve using C-SEO methods
domain = "retail"
with open(os.path.join(project_root, "data", domain, "selected_docs.json"), "r") as f:
    selected_docs = json.load(f)

In [None]:
selected_docs["0"]

In [None]:
data_path = "parameterlab/c-seo-bench"
list_methods = [
    "Authoritative",
    "Statistics",
    "Citations",
    "Fluency",
    "UniqueWords",
    "TechnicalTerms",
    "SimpleLanguage",
    "Quotes",
    "LLMstxt",
    "ContentImprovement",
]

methodname2class = {
    "Authoritative": Authoritative,
    "LLMstxt": LLMstxt,
    "ContentImprovement": ContentImprovement,
    "Citations": Citations,
    "Statistics": Statistics,
    "Fluency": Fluency,
    "UniqueWords": UniqueWords,
    "TechnicalTerms": TechnicalTerms,
    "SimpleLanguage": SimpleLanguage,
    "Quotes": Quotes,
}

In [None]:
llm_name = "gpt-4o-mini-2024-07-18"
method_name = list_methods[0]

print(
    f"You will run the C-SEO method {method_name} with {llm_name} on the selected documents from {domain}."
)

In [None]:
llm = OpenAIHelper(llm_name)

In [None]:
method = methodname2class[method_name](llm)

# Run the C-SEO Method

In [None]:
output_folder = os.path.join(
    project_root,
    "data",
    domain,
)
print(f"Output folder: {output_folder}")
os.makedirs(output_folder, exist_ok=True)
list_docs = []
for data_point_idx in selected_docs.keys():
    for doc_idx in selected_docs[data_point_idx].keys():
        list_docs.append(selected_docs[data_point_idx][doc_idx]["doc"])

batch_id = method.improve_texts(list_docs, output_folder)

In [None]:
status = llm.get_status(batch_id)
print(f"Status for {domain} x {method_name}: {status}")

In [None]:
# Download the results (run this when the status is 'completed')
results_txt, total_cost = llm.retrieve_results(batch_id)
i = 0
for data_point_idx in selected_docs:
    for doc_idx in selected_docs[data_point_idx].keys():
        selected_docs[data_point_idx][doc_idx][f"{method_name}(doc)"] = results_txt[i]
        i += 1
# This is the list of updated descriptions using the method. For your convenience, we also provide them on the Hugging Face dataset.
with open(os.path.join(output_folder, "selected_docs.json"), "w") as f:
    f.write(json.dumps(selected_docs))

with open(os.path.join(output_folder, "total_cost.txt"), "w") as f:
    f.write(str(total_cost))

print(f"Total cost for {domain} x {method_name}: {total_cost}")
print(f"Results for {domain} x {method_name} saved in {output_folder}")