### BooookScore Summarization

In [3]:
import os
from booookscore.score import Scorer 

# Define variables
book_name = "reminiscences-of-pioneer-days-in-st-paul"
model = "gpt-4o"
method = "hier"  # or "increasing"

# Configuration variables
api = "openai"
api_key_path = "api.txt"
chunk_size = 4096
max_context_len = 8192

# Create results directory structure
data_dir = "data/"
results_dir = "results/" + model + "/" + book_name + "/" + method + "/"

pickle_book_path = data_dir + book_name + ".pkl"
chunked_output_path = results_dir + "chunked_book.pkl"
summaries_output_path = results_dir + "summaries.json"
postprocessed_summaries_output_path = summaries_output_path.replace('.json', '_cleaned.json')
annotations_output_path = results_dir + "annotations.json"
score_output_path = results_dir + "score.txt"


# Ensure the directories exist
os.makedirs(results_dir, exist_ok=True)

# Define commands
chunk_command = (f"python -m booookscore.chunk --chunk_size {chunk_size} --input_path {pickle_book_path} --output_path {chunked_output_path}")
summ_command = (
    f"python -m booookscore.summ --book_path {chunked_output_path} --summ_path {summaries_output_path} --model {model} --api {api} "
    f"--api_key {api_key_path} --method {method} --chunk_size {chunk_size} --max_context_len {max_context_len}"
)
postprocess_command = (f"python -m booookscore.postprocess --input_path {summaries_output_path}")

# Execute commands
print("Chunking the book...")
os.system(chunk_command)

print("Summarizing the book...")
os.system(summ_command)

print("Postprocessing the summaries...")
os.system(postprocess_command)

print("Scoring the summaries...")
scorer = Scorer(model=model,api=api,api_key=api_key_path,summ_path=postprocessed_summaries_output_path,annot_path=annotations_output_path,
    template_path="prompts/get_annotations.txt",v2=False,)
score = scorer.get_score()
print(f"BooookScore = {score}")
with open(score_output_path, "w") as f:
    f.write(f"BooookScore = {score}")


print(f"All files have been saved to {results_dir}")

Chunking the book...


100%|██████████| 1/1 [00:00<00:00, 31068.92it/s]


Already processed, skipping...
Summarizing the book...
Loading existing summaries...
Already processed, skipping...
Postprocessing the summaries...


Iterating over books: 100%|██████████| 1/1 [00:00<00:00, 29959.31it/s]
Iterating over books: 100%|██████████| 1/1 [00:00<00:00, 18315.74it/s]


Skipping reminiscences-of-pioneer-days-in-st-paul
Scoring the summaries...
No annotations found, getting annotations...


Iterating over sentences: 100%|██████████| 29/29 [04:37<00:00,  9.58s/it]
Iterating over summaries: 100%|██████████| 1/1 [04:37<00:00, 277.92s/it]

BooookScore = 0.7586206896551724
All files have been saved to results/gpt-4o/reminiscences-of-pioneer-days-in-st-paul/hier/



