# Simple notebook to help write out a useful paper summary to a word document. 


## This tool is designed to help write out an intro, figure AND summarizedcaptions, and critique of a paper to a short document. In my opinion, that is a better way to try and understand a paper compared to just reading a summary. 

In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
pdf_path='./data/2212.00136v2.pdf'

In [4]:
num_of_figs=4

In [5]:
import fitz  # PyMuPDF
import os
from PIL import Image, ImageDraw, ImageFont
from io import BytesIO
import tempfile
import ell 

def embed_text_in_image(image, text, font_size=1000, position=(10, 10)):
    # Convert the image bytes to a PIL Image object
    image = Image.open(BytesIO(image))
    
    # Prepare to draw on the image
    draw = ImageDraw.Draw(image)
    
    # Load a font (default font from PIL)
    try:
        font = ImageFont.truetype("arial.ttf", font_size)  # You can use other fonts available on your system
    except IOError:
        font = ImageFont.load_default()
    
    # Embed the text
    draw.text(position, text, font=font, fill=(0, 0, 0))  # Fill white text

    # Return the modified image
    return image

def extract_images_from_pdf(pdf_path, output_folder, text_to_embed="Figure {page_num}"):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Make sure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    image_count = 0
    for page_number in range(len(pdf_document)):
        # Get the page
        page = pdf_document.load_page(page_number)
        
        # Extract images on the page
        image_list = page.get_images(full=True)
        
        for image_index, img in enumerate(image_list):
            image_count += 1
            # Extract image object number
            xref = img[0]
            
            # Extract the image bytes
            base_image = pdf_document.extract_image(xref)
            image_bytes = base_image["image"]
            image_ext = base_image["ext"]
            
            # Embed the text in the image (e.g., Page number or custom text)
            embedded_text = text_to_embed.format(page_num=image_count)
            modified_image = embed_text_in_image(image_bytes, embedded_text)
            
            # Save the modified image as a PNG
            image_filename = f"image_{page_number + 1}_{image_index + 1}.png"
            image_filepath = os.path.join(output_folder, image_filename)
            
            # Save the image to disk
            modified_image.save(image_filepath, "PNG")
    
    print(f"Extracted and saved {image_count} images with embedded text from the PDF.")
    pdf_document.close()

from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH

def create_word_report(file_name, sections):
    """
    Creates a Word document report with titles, text, and images.

    :param file_name: Name of the output Word file.
    :param sections: List of dictionaries, each dictionary represents a section with:
                     - 'title': Section title (string)
                     - 'text': Section text (string)
                     - 'image_path': Optional path to an image file (string)
    """
    # Initialize the Word document
    doc = Document()

    # Iterate through each section
    for section in sections:
        # Add the title with heading style
        title = doc.add_heading(section.get("title", ""), level=1)
        title.alignment = WD_ALIGN_PARAGRAPH.CENTER  # Center-align title

        # Add the image if provided
        image_path = section.get("image_path")
        if image_path:
            doc.add_paragraph()  # Adds some space before the image
            doc.add_picture(image_path, width=Inches(5))  # Adjust image width to 5 inches


        # Add the section text
        doc.add_paragraph(section.get("text", ""))
        doc.add_paragraph()  # Adds a blank line between sections

    # Save the Word document
    doc.save(file_name)
    print(f"Report saved as {file_name}")


In [6]:
pdf_document = fitz.open(pdf_path)

In [7]:
pages=[pdf_document.load_page(i).get_text() for i in  range(len(pdf_document))]

In [8]:
import tempfile


In [9]:
tmp_folder = tempfile.gettempdir()

In [10]:
extract_images_from_pdf(pdf_path,tmp_folder)

Extracted and saved 7 images with embedded text from the PDF.


In [11]:
@ell.simple(model="gpt-4o")
def intro(text: str):
    """I want you to act as a PhD level research paper summarizer. I will provide you with a research paper \
    on a specific topic, and you will create a introductory ONE paragraph summary the main points and \
    findings of the paper. Your summary should be concise and should accurately and objectively \
    communicate the key points of the paper. You should not include any personal opinions or \
    potential uses or interpretations in the paragraph, but rather focus on objectively presenting the
    scientific content from the paper. Your summary should \
    be written in your own words and should not include any direct quotes from the paper. Please \
    ensure that your summary is clear, concise, and accurately reflects the content of the original paper.
    Provide a short title of <50 characters separated by <Title> and </Title> and \
    a single summary paragraph separated by <Long Summary> and </Long Summary> 
    Think deeply about the paper, go step by step, and do not hallucinate.""" # System prompt
    return f"Please help me summarize {text}." # User prompt


In [12]:
@ell.simple(model="gpt-4o")
def three_main_points(text: str):
    """I want you to act as a PhD level research paper summarizer. I will provide you with a research paper \
    on a specific topic, and you will create a introductory summary list of the 3 main points and \
    findings of the paper.\
    Your list should be concise and should accurately and objectively communicate the key points \
    of the paper. You should not include any personal opinions or interpretations in your summary, \
    but rather focus on objectively presenting the information from the paper. Your summary should \
    be written in your own words and should not include any direct quotes from the paper. Please \
    ensure that your summary is clear, concise, and accurately reflects the content of the original paper.
    Provide a short title of <50 characters separated by <Title> and </Title> and \
    a longer numbered list separated by <Long Summary> and </Long Summary> 
    Think deeply about the paper, go step by step, and do not hallucinate.""" # System prompt
    return f"Please help me summarize {text}." # User prompt


In [13]:
@ell.simple(model="gpt-4o")
def limitations(text: str):
        return [
        ell.system("""I want you to act as a research paper critic. I will provide you with a research paper \
    on a specific topic, and you will create a critical point by point summary of the main limitations of the paper.\
    Your summary should be concise and should accurately and objectively communicate the potential scientific \
    issues within the paper. Think deeply about the metdology and results, and where the authors could have made \
    a mistake.You should not summarize the paper not should you include any personal opinions. Rather focus on \
    potential issues and areas for \
    future improvement. Your summary should be written in your own words and should not include any direct 
    quotes from the paper. Please ensure that your summary is clear, concise, and accurately reflects the \
    content of the original paper.
    Provide a short title of <50 characters separated by <Title> and </Title> and \
    a longer numbered list separated by <Long Summary> and </Long Summary> 
    Think deeply about the paper, go step by step, and do not hallucinate."""),
        ell.user(f"Please help me summarize {text}.")]


In [14]:
@ell.complex(model="gpt-4o")
def figure_summary(image: Image.Image,fig_id, docs ):

    return [
        ell.system("You are a PhD level scientist. Please \
        summarize all information related to the given image from the following paper. \
        Please return coordinates for a bounding box around anything you think the user\
        should focus on. Go step by step,\
        think before you act, and do not hallucinate."),
        ell.user([
            "Image:", image,
            "Request:", f"Please help me understand the given figure (figure {fig_id}) from {docs}."
        ])
    ]


In [15]:
from re import split
from glob import glob

natsort = lambda s: [int(t) if t.isdigit() else t.lower() for t in split(r'(\d+)', s)]



In [16]:
path = f"{tmp_folder}/*.png"
sorted_image_files = sorted(glob(path), key=natsort)


In [17]:
# Creating the report 

In [18]:
intro_doc =intro(pages)
intro_sec  = [
    {
        "title": intro_doc.split("<Title>")[1].split('</Title>')[0],
        "text": intro_doc.split("<Long Summary>")[1].split('</Long Summary>')[0],
        "image_path": None
    }]

three_main_doc =three_main_points(pages)
three_sec  = [
    {
        "title": three_main_doc.split("<Title>")[1].split('</Title>')[0],
        "text": three_main_doc.split("<Long Summary>")[1].split('</Long Summary>')[0],
        "image_path": None
    }]


images_sec = [    {
        "title": f"Figure {i+1}",
        "text": figure_summary(Image.open(v), i+1, pages).content[0].text,
        "image_path": v
    }
    for i,v in enumerate(sorted_image_files[:num_of_figs])]


limitations_doc = limitations(pages)

critque_sec  = [
    {
        "title": limitations_doc.split("<Title>")[1].split('</Title>')[0],
        "text": limitations_doc.split("<Long Summary>")[1].split('</Long Summary>')[0],
        "image_path": None
    }]


In [19]:
# Writing out the word document 

In [20]:
sections = []
sections.extend(intro_sec)
sections.extend(three_sec)
sections.extend(images_sec)
sections.extend(critque_sec)
create_word_report("example_report.docx", sections)


Report saved as example_report.docx
