In [1]:
pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import openai
import ebooklib # Parse EPUB
import pymupdf # Parse PDF
import re 
import math 

from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from ebooklib import epub
from bs4 import BeautifulSoup # Parse HTML, XML

In [None]:
def html_to_str(chapter):
    soup = BeautifulSoup(chapter.get_content(), 'html.parser') # Built-in Parser
    return soup.get_text()

def clean_str(str):
    str = str.replace('\xa0', ' ').replace('\n', ' ').replace('  ', ' ') # Deal with non-text content. 
    return str.strip()

# Chapter-based chunks. Iterate through pages in pdf until we get to the hyperlinks (table of contents) to get the pages corresponding to chapters
# Assumes that the first instance of hyperlinks is the table of contents
# If there are no hyperlinks just returns [start, end] pages. 
def extract_pages_hyperlinks_pdf(path):
    doc = pymupdf.open(path)
    hyperlinks = []
    hyperlinks.append(0) 
    for page in doc:
        links = page.get_links()  # Extract all links
        need_to_sort = False
        if len(links) > 0: # Table of Contents
            for link in links:
                temp = link.get("page") 
                if temp not in hyperlinks:
                    if len(hyperlinks) > 0:
                        if temp < hyperlinks[-1]:
                            need_to_sort = True
                        if (temp - 2) < hyperlinks[-1]: # each page is ~500 tokens max. 
                            continue
                    hyperlinks.append(temp)
            hyperlinks.append(len(doc))
            if need_to_sort:
                sorted(hyperlinks) # When the hyperlinks are out of order
            return doc, hyperlinks

    hyperlinks.append(len(doc))
    doc, hyperlinks

def parse_epub(path): 
    book = epub.read_epub(path)
    items = list(book.get_items_of_type(ebooklib.ITEM_DOCUMENT)) # ITEM_DOCUMENT is flag for actual words
    chapters = []
    for item in items:
        temp = clean_str(html_to_str(item))
        # temp will be empty if it's the item is non-content data
        if temp:
            chapters.append(temp)
    return chapters

def parse_pdf(path): 
    # Create Chunks Based on "chapters" via Hyperlink pages. 
    # If Hyperlinks don't exist
    doc, hyperlinks = extract_pages_hyperlinks_pdf(path)
    chapters = []
    for i in range(0, len(hyperlinks) - 1):
        text = []
        for pages in range(hyperlinks[i], hyperlinks[i + 1]):
            text.append(clean_str(doc[pages].get_text()))
        chapters.append(''.join(text))
    if len(hyperlinks) == 2:
        chapters = generate_equal_chunks(chapters)
    return chapters

def parse_xml(path):
    soup = BeautifulSoup(open(path, "r", encoding="utf-8").read(), 'lxml')
    title_tag = soup.find("title")
    chapters = []

    if title_tag:
        chapters.append(title_tag.text.strip())
    content_tags = soup.find_all("content:encoded") 
    for content in content_tags: 
        temp = content.text
        if temp.endswith("]]>"):
            temp = temp[:-3]
        temp = clean_str(temp)
        if temp:
            chapters.append(temp)
    return chapters

def parser(path):
    file, extension = os.path.splitext(path)
    if extension == ".epub":
        return parse_epub(path)
    elif extension == ".pdf":
        return parse_pdf(path)
    elif extension == '.xml':
        return parse_xml(path)
    else: 
        file = open(path, "r", encoding = "utf-8")
        return file.read()

def coalesce(book, min_length=15000):
    out = []
    for str in book:
        # Combing smaller chapters or extra non-chapter information into same call
        if len(out) > 0 and (len(out[-1]) + len(str) < min_length or len(str) > 10 * len(out[-1])):
            temp = out[-1]
            out[-1] = temp + " " + str
        else:
            out.append(str)
    return out

def generate_equal_chunks(coalesed_books, num_calls = 10):
    output = []
    for book in coalesed_books:
        min_size = 0
        for chapter in book:
            min_size += len(chapter)
        min_size = math.ceil(float(min_size) / num_calls) # num_calls of each min_size characters (tokens ~ characters / 4)

        combined = []
        current = ""

        for chapter in book:
            if len(current) + len(chapter) <= min_size:
                current = current + " " + chapter
            else: 
                combined_string = current + " " + chapter
                split = min_size
                punctuation = {'.', '!', '?'}
                for chr in range(min_size - 1, -1, -1):
                    if combined_string[chr] in punctuation:
                        split = chr + 1
                        break

                combined.append(combined_string[:split].strip())
                current = combined_string[split:]
        if current: 
            combined.append(current.strip()) 
        if combined: 
            output.append(combined)
    return output

# parsed_books have removed all non-text from each file
# each index is a list of lists so that each chunk represents a chapter or split (information about the book)
# https://platform.openai.com/tokenizer

  for root_file in tree.findall('//xmlns:rootfile[@media-type]', namespaces={'xmlns': NAMESPACES['CONTAINERNS']}):


In [None]:
# Testing Four Approaches
# gpt-4o only unless otherwise stated 

# Test 0: 
# Generate Analysis for Each Novel 
# Generate Comparitive Analysis for the Individual Novel Analyses
# Generate 5-paragraph Report with One Single Call

# Test 1:  
# Generate analysis for each chunk independently (granularity of at least one chapter per call)
# Novel-level analysis to coalesce the result of each chunk into one analysis
# Comparative Analysis of the 3 Novel-level analyses 
# Generate 5-paragraph Report with One Single Call

# Test 2:
# PT1: Generate each chunk independently (split so chunks are equal in size) 
# PT2: Generate each chunk-analysis so that they are given the context of previous analyses. - incremental merging 

# Test 3:
# Select the best of Test 0 - 2
# Novel-level analysis by passing in two chunks at a time and combining until there's one left (mergesort approach) - hierarchal merging


# Test 4: 
# Select the best of Test 0-3
# Generate 5-Paragraph Report with multiple calls (Body Paragraphs -> Thesis -> Introduction -> Conclusion)

# Test 5: 
# Select the best of Test 0-4
# gpt 3.5 for chunk level, gpt4-o for rest 
# Run the best of the previous tests using different company models 

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
client = openai.OpenAI(api_key=OPENAI_API_KEY)
modelName = 'gpt-4o'

def LiteraryAnalyst(excerpt, modelName, client = client):

  system = "You are a Literary Analyst focused on the theme of social isolation. Identify multiple passages, quotes, and examples that display this theme in the excerpt."

  prompt = f"Analyze the following excerpt, specifically about the theme of social isolation. Return direct quotes and examples from the text, and explain how they relate to social isolation: \n {excerpt}"

  completion = client.chat.completions.create(
    model = modelName,
    messages=[
      {"role": "system", "content": system},
      {"role": "user", "content": prompt}
    ]
  )
  
  return completion.choices[0].message.content

def LiteraryAnalystIncremental(pastResponses, excerpt, modelName, client = client):
  pastResponses = '\n'.join(pastResponses)
  
  system = "You are a Literary Analyst focused on the theme of social isolation. Identify multiple passages, quotes, and examples that display this theme in the excerpt."

  prompt = f"Below are analyses of previous sections of the text regarding the theme of social isolation:\n\n{pastResponses}\n\nNow, analyze the following excerpt in the context of these past responses. Identify direct quotes and examples that display social isolation, and explain how they connect to or expand upon the theme as established in prior sections:\n\n{excerpt}"

  completion = client.chat.completions.create(
    model = modelName,
    messages=[
      {"role": "system", "content": system},
      {"role": "user", "content": prompt}
    ]
  )
  
  return completion.choices[0].message.content

def SummarizeNovelLevelAnalysis(chunks, modelName, client = client):  
  analyses = '\n'.join(chunks)

  system = "You are a Literary Analyst focused on synthesizing analysis about social isolation. Your goal is to return a novel-level summary about how this work " \
  "displays the theme of social isolation. Reference the most important quotes, examples, and passages in the chunk-level analyses, and explain their significance."

  prompt = f"Below are chunk-level analyses produced for the novel. Please synthesize these into a single, detailed summary focused on the theme of social isolation. Return important quotes and explanations that highlight the author's perspective on social isolation. If the chunk analyses do not contain a particular detail or example, do not make it up. Generate the summary only on the chunk-level analyses provided below: \n {analyses}"
  
  completion = client.chat.completions.create(
    model = modelName,
    messages=[
      {"role": "system", "content": system},
      {"role": "user", "content": prompt}
    ]
  )
  
  return completion.choices[0].message.content

def SummarizeNovelLevelAnalysisHierarchical(chunk_1, chunk_2, modelName, client = client):  

  system = "You are a Literary Analyst focused on synthesizing analysis about social isolation. Your goal is to return a summary about how this work " \
  "displays the theme of social isolation. Reference the most important quotes, examples, and passages in the chunk-level analyses, and explain their significance."

  prompt = f"Below are two chunk-level analyses produced for the novel. Please synthesize these into a single, detailed summary focused on the theme of social isolation. Return important quotes and explanations that highlight the author's perspective on social isolation. If the chunk analyses do not contain a particular detail or example, do not make it up. Generate the summary only on the chunk-level analyses provided below: \n {chunk_1} \n {chunk_2}"
  
  completion = client.chat.completions.create(
    model = modelName,
    messages=[
      {"role": "system", "content": system},
      {"role": "user", "content": prompt}
    ]
  )
  
  return completion.choices[0].message.content

def CompareBooks(novels, modelName, client = client):
  compare_analyses = '\n'.join(novels)

  system = "You are an expert at comparing literature. Create a detailed comparison of how different novels approach the theme of social isolation, focusing on the author's point of view, as well as supporting quotations, passages, and corresponding explanations."
  
  prompt = f"Compare and contrast how these following novels handle the theme of social isolation. The following are analyses of how each novel handles the theme. Identify how each text handles the theme of social isolation and how they differ approach, perspective, and resolution of this theme by including several specific examples. If the novel analyses do not contain a particular detail or example, do not make it up. Generate the summary only using analyses provided below: \n {compare_analyses}"


  completion = client.chat.completions.create(
    model = modelName,
    messages=[
      {"role": "system", "content": system},
      {"role": "user", "content": prompt}
    ]
  )
  
  return completion.choices[0].message.content


def GenerateReport(comparison, modelName, client = client):
  system = "You are an expert book report writer with a focus on comparative literary analysis. Your task is to create a detailed book report analyzing how different books explore the theme of social isolation by providing detailed comparisons, highlighting themes, and discussing the techniques and views of the authors."
  
  prompt = f"""
  Write a five-paragraph comparative book report based on a Comparative Analysis to explain how the works handle the theme of social isolation."

  Structure of Report: 
  Introduction Paragraph: Introduce the books and end with a strong thesis statement about how each book deals with social isolation
  First Body Paragraph: Explain how the first book deals with the topic of social isolation with at least 1-2 direct references to the text.
  Second Body Paragraph: Explain how the second book deals with the topic of social isolation with at least 1-2 direct references to the text.
  Third Body Paragraph: Explain how the third book deals with the topic of social isolation with at least 1-2 direct references to the text.
  Conclusion Paragraph: Summarize the key points in the body paragraphs as well as restating the thesis. 

  Requirements: 
  Each body paragraph must include direct and accurate quotations from the book. 
  The report should focus on the differences and similarities in how each author views and expresses the theme of social isolation.
  The report should be in a professional tone and digestible.

  The Comparative Analysis is included below: \n {comparison}
  """


  completion = client.chat.completions.create(
    model = modelName,
    messages=[
      {"role": "system", "content": system},
      {"role": "user", "content": prompt}
    ]
  )
  
  return completion.choices[0].message.content

def CompareBookReports(report1, report2, modelName, client=client):
  system = """ 
      You are an expert literary analyst. Compare two book reports based on the following three factors: 
      the depth of analysis and evidence, writing clarity, and logical coherence. 
      Evaluate both reports carefully."
      Return only '0' if the first is better or '1' if the second is better. 
      """

  prompt = f"""
      Here are two book reports analyzing how the same books explore the theme of social isolation. 
      Evaluate them independently based on depth of analysis, writing clarity, and logical coherence. 
      After considering both carefully, determine which one is stronger overall. 
      Provide your answer as '0' if Report 1 is better or '1' if Report 2 is better. 
      The positions are irrelevant .\n\n
      Book Report 1:\n {report1} \n\n Book Report 2:\n {report2}.
      """

  print(prompt)
  completion = client.chat.completions.create(
      model=modelName,
      messages=[
          {"role": "system", "content": system},
          {"role": "user", "content": prompt}
      ]
  )

  return completion.choices[0].message.content

def GenerateBodyParagraphs(comparison, modelName, client):

    system = "You are an expert in literary comparison. Your goal is to write three body paragraphs that each focus solely on how one of the three books addresses the theme of social isolation. " \
    "Each paragraph must include at least one to two direct quotes or passages from the provided analysis only if available."

    prompt = f"""
        Below is a comparative analysis of three novels' approaches to social isolation. 
        Using only this analysis, write three body paragraphs:
        Body Paragraph 1: Focus on the first book, analyzing how it treats social isolation with supporting quotes.
        Body Paragraph 2: Focus on the second book, analyzing how it treats social isolation with supporting quotes.
        Body Paragraph 3: Focus on the third book, analyzing how it treats social isolation with supporting quotes.
        Each paragraph should focus on one novel's themes and views, with direct quotes only if available.
        Comparative Analysis: \n{comparison}
    """

    completion = client.chat.completions.create(
        model=modelName,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": prompt}
        ]
    )

    return completion.choices[0].message.content

def GenerateIntroduction(body_paragraphs, comparison, modelName, client):

    system = "You are an expert in literary comparison. Your goal is to write an introduction paragraph with a strong thesis for a comparative book report on " \
    "social isolation across three works. The introduction must reference the already written body paragraphs."

    prompt = f"""
        Below is the following body paragraphs already written: \n
        {body_paragraphs} \n \n 
        Below is the comparative analysis of the three novels for additional context: \n
        {comparison} \n \n
        Write a single introduction paragraph with the following goals: \n
        1. Clear thesis of how the theme of social isolation is handled in the three books by referencing the analysis in the body paragraphs, 
        2. Mention the title of the three books and the authors.
        3. Describe the main points to be detailed in the body paragraphs.
    """

    completion = client.chat.completions.create(
        model=modelName,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": prompt}
        ]
    )

    return completion.choices[0].message.content

def GenerateConclusion(introduction, body_paragraphs, comparison, modelName, client):

    system =  f"""You are a literary analyst. Your goal is to write a single conclusion paragraph with the following goals: \n 
        1. Synthesize the key similarities and differences among the three novels regarding the theme of social isolation
        2. Restate the thesis in a different way, and 
        3. Final thought on social isolation."""

    prompt = f"""
        Below is the introduction already written: \n
        {introduction} \n \n 
        Below is the three body paragraphs: \n
        {body_paragraphs} \n \n
        Finally, here is the generated comparative analysis for the three texts for extra reference: \n
        {comparison} \n \n
        Write a single conclusion paragraph with the following goals: \n
        1. Summarize the main points about similarities and differences from the body paragraphs
        2. Restate the thesis in a different way  
        3. Leave a final thought on why these comparisons matter and about social isolation.
    """

    completion = client.chat.completions.create(
        model=modelName,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": prompt}
        ]
    )

    return completion.choices[0].message.content


def read_doc(path):
  doc = Document(path)
  return "\n".join([p.text for p in doc.paragraphs])

def save_to_doc(report, filename):
    doc = Document()
    paragraph = report.split('\n')
    heading = doc.add_heading("Comparative Analysis", level=1)
    heading.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    for text in paragraph:
      if not text: 
        continue
      doc.add_paragraph("       " + text)
    doc.save(filename)


reports = []

In [None]:
# Test 0 

test_version_0 = []
outer = 1
for book in coalesed_books:
    temp = LiteraryAnalyst('\n'.join(book), modelName, client)
    test_version_0.append(temp)
    print(f"Finished Book {outer} \n")
    outer += 1

comparison = CompareBooks(test_version_0, modelName, client)
print(f"Finished Comparing The Three Books \n")

result = GenerateReport(comparison, modelName, client)
result = re.sub(r'\*(.*?)\*', r'"\1"', result)

reports.append(result)

with open("five_page_report_call_v0.docx", "w") as file:
    save_to_doc(result, 'five_page_report_call_v0.docx')

11
Finished Book 1 

9
Finished Book 2 

3
Finished Book 3 

Finished Comparing The Three Books 



In [None]:
# Test 1

chunk_level = []
outer = 1

for book in coalesed_books:
    temp = []
    for i, chapter in enumerate(book):
        temp.append(LiteraryAnalyst(chapter, modelName, client)) 
        print(f"Finished iteration {i} \n")
    print(f"Finished Book {outer} \n")
    chunk_level.append(temp)
    outer += 1

novel_level = []
outer = 1
for novel in chunk_level:
    novel_level.append(SummarizeNovelLevelAnalysis(novel, modelName, client))
    print(f"Finished Summarizing Book {outer} \n")
    outer += 1

comparison = CompareBooks(novel_level, modelName, client)
print(f"Finished Comparing The Three Books \n")

result = GenerateReport(comparison, modelName, client)
result = re.sub(r'\*(.*?)\*', r'"\1"', result)

reports.append(result)

with open("five_page_report_call_v1.docx", "w") as file:
    save_to_doc(result, 'five_page_report_call_v1.docx')

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished iteration 9 

Finished iteration 10 

Finished Book 1 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished Book 2 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished Book 3 

Finished Summarizing Book 1 

Finished Summarizing Book 2 

Finished Summarizing Book 3 

Finished Comparing The Three Books 



In [None]:
# Test 2 - equal chunks 

equal_chunks = generate_equal_chunks(coalesed_books)
chunk_level = []

outer = 1
for book in equal_chunks:
    temp = []
    for i, chapter in enumerate(book):
        temp.append(LiteraryAnalyst(chapter, modelName, client)) 
        print(f"Finished iteration {i} \n")
    print(f"Finished Book {outer} \n")
    chunk_level.append(temp)
    outer += 1

novel_level = []
outer = 1
for novel in chunk_level:
    novel_level.append(SummarizeNovelLevelAnalysis(novel, modelName, client))
    print(f"Finished Summarizing Book {outer} \n")
    outer += 1

comparison = CompareBooks(novel_level, modelName, client)
print(f"Finished Comparing The Three Books \n")

result = GenerateReport(comparison, modelName, client)
result = re.sub(r'\*(.*?)\*', r'"\1"', result)

reports.append(result)

with open("five_page_report_call_v2_1.docx", "w") as file:
    save_to_doc(result, 'five_page_report_call_v2_1.docx')


Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished iteration 9 

Finished Book 1 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished iteration 9 

Finished Book 2 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished Book 3 

Finished Summarizing Book 1 

Finished Summarizing Book 2 

Finished Summarizing Book 3 

Finished Comparing The Three Books 



In [None]:
# Test 2 (Incremental Merging)  
equal_chunks = generate_equal_chunks(coalesed_books)
chunk_level = []

outer = 1
for book in equal_chunks:
    temp = []
    for i, chapter in enumerate(book):
        if len(temp) == 0:
          temp.append(LiteraryAnalyst(chapter, modelName, client)) 
        else: 
          temp.append(LiteraryAnalystIncremental(temp, chapter, modelName, client)) 
        print(f"Finished iteration {i} \n")
    print(f"Finished Book {outer} \n")
    chunk_level.append(temp)
    outer += 1

novel_level = []
outer = 1
for novel in chunk_level:
    novel_level.append(SummarizeNovelLevelAnalysis(novel, modelName, client))
    print(f"Finished Summarizing Book {outer} \n")
    outer += 1

comparison = CompareBooks(novel_level, modelName, client)
print(f"Finished Comparing The Three Books \n")

result = GenerateReport(comparison, modelName, client)
result = re.sub(r'\*(.*?)\*', r'"\1"', result)

reports.append(result)

with open("five_page_report_call_v2_2.docx", "w") as file:
    save_to_doc(result, 'five_page_report_call_v2_2.docx')

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished iteration 9 

Finished Book 1 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished iteration 9 

Finished Book 2 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished Book 3 

Finished Summarizing Book 1 

Finished Summarizing Book 2 

Finished Summarizing Book 3 

Finished Comparing The Three Books 



In [None]:
# Testing all the essays to get the best one 

def get_best_output(outputs): 
    while len(outputs) > 1:
         # Comparison Based Judger on Two Reports until One is Left
        temp = CompareBookReports(outputs[0], outputs[1], modelName, client=client)
        saved = outputs[0]
        if '0' in temp[0:5]:
            saved = outputs[0]
        elif '1' in temp[0:5]: 
            saved = outputs[1]
        else: 
            print("Error")
        print(saved)
        outputs = outputs[2:]
        outputs.append(saved)
        
    return outputs[0] 

result = [get_best_output(reports) for i in range(20)] # Found Test 1 was the best.

In [None]:
# Test 3 (Hierarchical Merging)

chunk_level = []
outer = 1

for book in coalesed_books:
    temp = []
    for i, chapter in enumerate(book):
        temp.append(LiteraryAnalyst(chapter, modelName, client)) 
        print(f"Finished iteration {i} \n")
    print(f"Finished Book {outer} \n")
    chunk_level.append(temp)
    outer += 1

novel_level = []
outer = 1
for novel in chunk_level:
    temp = novel.copy() 
    while (len(temp) > 1):
        temp.append(SummarizeNovelLevelAnalysisHierarchical(temp[0], temp[1], modelName, client))
        temp = temp[2:]

    print(f"Finished Summarizing Book {outer} \n")
    outer += 1

comparison = CompareBooks(novel_level, modelName, client)
print(f"Finished Comparing The Three Books \n")

result = GenerateReport(comparison, modelName, client)
result = re.sub(r'\*(.*?)\*', r'"\1"', result)

reports.append(result) 

with open("five_page_report_call_v3.docx", "w") as file:
    save_to_doc(result, 'five_page_report_call_v3.docx')

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished iteration 9 

Finished iteration 10 

Finished Book 1 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished Book 2 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished Book 3 

Finished Summarizing Book 1 

Finished Summarizing Book 2 

Finished Summarizing Book 3 

Finished Comparing The Three Books 



In [None]:
result = get_best_output(reports) # Test 1 was still the best

In [None]:
# Test 4 - 3 Report Generation Calls Instead

chunk_level = []
outer = 1

for book in coalesed_books:
    temp = []
    for i, chapter in enumerate(book):
        temp.append(LiteraryAnalyst(chapter, modelName, client)) 
        print(f"Finished iteration {i} \n")
    print(f"Finished Book {outer} \n")
    chunk_level.append(temp)
    outer += 1

novel_level = []
outer = 1
for novel in chunk_level:
    novel_level.append(SummarizeNovelLevelAnalysis(novel, modelName, client))
    print(f"Finished Summarizing Book {outer} \n")
    outer += 1

comparison = CompareBooks(novel_level, modelName, client)
print(f"Finished Comparing The Three Books \n")

body_paragraphs = GenerateBodyParagraphs(comparison, modelName, client)
body_paragraphs = re.sub(r'\*(.*?)\*', r'"\1"', body_paragraphs)
introduction = GenerateIntroduction(body_paragraphs, comparison, modelName, client)
introduction = re.sub(r'\*(.*?)\*', r'"\1"', introduction)
conclusion = GenerateConclusion(introduction, body_paragraphs, comparison, modelName, client)
conclusion = re.sub(r'\*(.*?)\*', r'"\1"', conclusion)

result = '\n'.join([introduction, body_paragraphs, conclusion])
# result = re.sub(r'\(body paragraph(?: \d+(?:, \s?\d+)*)?\)', '', result)

reports.append(result)

with open("five_page_report_call_v4.docx", "w") as file:
    save_to_doc(result, 'five_page_report_call_v4.docx')

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished iteration 9 

Finished iteration 10 

Finished Book 1 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished Book 2 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished Book 3 

Finished Summarizing Book 1 

Finished Summarizing Book 2 

Finished Summarizing Book 3 

Finished Comparing The Three Books 



In [None]:
# Test 5 - cheaper models for analysis and more expensive models for summary 

chunk_level = []
outer = 1

for book in coalesed_books:
    temp = []
    for i, chapter in enumerate(book):
        temp.append(LiteraryAnalyst(chapter, 'gpt-3.5-turbo', client)) 
        print(f"Finished iteration {i} \n")
    print(f"Finished Book {outer} \n")
    chunk_level.append(temp)
    outer += 1

novel_level = []
outer = 1
for novel in chunk_level:
    novel_level.append(SummarizeNovelLevelAnalysis(novel, modelName, client))
    print(f"Finished Summarizing Book {outer} \n")
    outer += 1

comparison = CompareBooks(novel_level, modelName, client)
print(f"Finished Comparing The Three Books \n")

body_paragraphs = GenerateBodyParagraphs(comparison, modelName, client)
body_paragraphs = re.sub(r'\*(.*?)\*', r'"\1"', body_paragraphs)
introduction = GenerateIntroduction(body_paragraphs, comparison, modelName, client)
introduction = re.sub(r'\*(.*?)\*', r'"\1"', introduction)
conclusion = GenerateConclusion(introduction, body_paragraphs, comparison, modelName, client)
conclusion = re.sub(r'\*(.*?)\*', r'"\1"', conclusion)

result = '\n'.join([introduction, body_paragraphs, conclusion])
# result = re.sub(r'\(body paragraph(?: \d+(?:, \s?\d+)*)?\)', '', result)

reports.append(result)

with open("five_page_report_call_v5.docx", "w") as file:
    save_to_doc(result, 'five_page_report_final_call_v5.docx')

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished iteration 9 

Finished iteration 10 

Finished Book 1 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished iteration 3 

Finished iteration 4 

Finished iteration 5 

Finished iteration 6 

Finished iteration 7 

Finished iteration 8 

Finished Book 2 

Finished iteration 0 

Finished iteration 1 

Finished iteration 2 

Finished Book 3 

Finished Summarizing Book 1 

Finished Summarizing Book 2 

Finished Summarizing Book 3 

Finished Comparing The Three Books 

