In [16]:
import json

# Load the JSON data from the file
with open('paper.json', 'r') as file:
    data = json.load(file)

# Load and parse the JSON data (assumed to be already loaded as `data` from earlier steps)

# Initialize Markdown formatted text
markdown_text = ""

# Variables to help with formatting
current_paragraph = ""
last_was_header = False

# Process each item in the items list
for item in data['items']:
    text = item['str'].strip()
    font_name = item['fontName']
    has_eol = item['hasEOL']

    # Determine if the current text is a headline
    is_header = (font_name == 'g_d0_f5' or "Chapter" in text or "Section" in text or "Key Concepts" in text)

    # Handle headline formatting
    if is_header and text:
        # If there is an existing paragraph, add it before the header
        if current_paragraph:
            markdown_text += current_paragraph + "\n\n"
            current_paragraph = ""
        # Format the header and add to markdown
        markdown_text += f"## {text}\n\n"
        last_was_header = True
    else:
        # Continue building the paragraph
        if current_paragraph:
            # Add a space if it's a continuation of a paragraph
            current_paragraph += " " + text if not last_was_header else text
        else:
            current_paragraph = text
        last_was_header = False

        # Add paragraph to markdown if end of line is reached
        if has_eol and current_paragraph:
            markdown_text += current_paragraph + " "
            current_paragraph = ""

# Check if there's any remaining paragraph to add
if current_paragraph:
    markdown_text += current_paragraph + "\n\n"

# Output the Markdown text
print(markdown_text.strip())  # Strip any extra newlines at the end

Automated Unit Test Improvement using Large Language Models at Meta  Nadia Alshahwan âˆ—  Jubin Chheda Anastasia Finegenova Beliz Gokkaya Mark Harman Inna Harper Alexandru Marginean Shubho Sengupta Eddy Wang  Meta Platforms Inc., Menlo Park, California, USA  ABSTRACT  This paper describes Metaâ€™s TestGen-LLM tool, which uses LLMs to automatically improve existing human-written tests. TestGen- LLM verifies that its generated test classes successfully clear a set of filters that assure measurable improvement over the original test suite, thereby eliminating problems due to LLM hallucination. We describe the deployment of TestGen-LLM at Meta test-a-thons for the Instagram and Facebook platforms. In an evaluation on Reels and Stories products for Instagram, 75% of TestGen-LLMâ€™s test cases built correctly, 57% passed reliably, and 25% increased coverage. During Metaâ€™s Instagram and Facebook test-a-thons, it improved 11.5% of all classes to which it was applied, with 73% of its recommen

In [17]:
# Updated script for improved Markdown formatting

# Initialize Markdown formatted text
improved_markdown_text = ""

# Variables to help with formatting
current_paragraph = ""
last_was_header = False

# Process each item in the items list
for item in data['items']:
    text = item['str'].strip()
    font_name = item['fontName']
    has_eol = item['hasEOL']

    # Determine if the current text is a headline
    is_header = (font_name == 'g_d0_f5' or "Chapter" in text or "Section" in text or "Key Concepts" in text)

    # Handle headline formatting
    if is_header and text:
        # If there is an existing paragraph, add it before the header
        if current_paragraph:
            improved_markdown_text += current_paragraph + "\n\n"
            current_paragraph = ""
        # Clean up the header to remove unwanted characters and format
        cleaned_header = text.replace("� ", "")  # Remove unwanted symbols
        # Format the header and add to markdown
        improved_markdown_text += f"## {cleaned_header}\n\n"
        last_was_header = True
    else:
        # Continue building the paragraph
        if current_paragraph:
            # Add a space if it's a continuation of a paragraph
            current_paragraph += " " + text if not last_was_header else text
        else:
            current_paragraph = text
        last_was_header = False

        # Add paragraph to markdown if end of line is reached
        if has_eol and current_paragraph:
            improved_markdown_text += current_paragraph + "\n"
            current_paragraph = ""

# Check if there's any remaining paragraph to add
if current_paragraph:
    improved_markdown_text += current_paragraph + "\n\n"

# Output the improved Markdown text
print(improved_markdown_text.strip())  # Strip any extra newlines at the end

Automated Unit Test Improvement using Large Language Models
at Meta 
Nadia Alshahwan âˆ— 
Jubin Chheda
Anastasia Finegenova
Beliz Gokkaya
Mark Harman
Inna Harper
Alexandru Marginean
Shubho Sengupta
Eddy Wang 
Meta Platforms Inc.,
Menlo Park, California, USA 
ABSTRACT 
This paper describes Metaâ€™s TestGen-LLM tool, which uses LLMs
to automatically improve existing human-written tests. TestGen-
LLM verifies that its generated test classes successfully clear a set
of filters that assure measurable improvement over the original
test suite, thereby eliminating problems due to LLM hallucination.
We describe the deployment of TestGen-LLM at Meta test-a-thons
for the Instagram and Facebook platforms. In an evaluation on
Reels and Stories products for Instagram, 75% of TestGen-LLMâ€™s
test cases built correctly, 57% passed reliably, and 25% increased
coverage. During Metaâ€™s Instagram and Facebook test-a-thons, it
improved 11.5% of all classes to which it was applied, with 73% of
its recommen

In [42]:
def process_json_file(data):    
    text_content = []
    current_paragraph = ""
    previous_item = None

    for item in data["items"]:
        text_str = item["str"]
        is_end_of_line = item["hasEOL"]
        
        # Improved heuristic to determine if consecutive lines should be combined
        if previous_item:
            vertical_distance = abs(item["transform"][5] - previous_item["transform"][5] + previous_item["height"])
            font_size_current = item["transform"][3]
            font_size_previous = previous_item["transform"][3]
            # Combining lines if they are close vertically and the font size does not change significantly
            font_size_difference = abs(font_size_current - font_size_previous)
            if vertical_distance < 12 and font_size_difference <= 2:
                # Combine with a space if it seems like a continuation of a headline
                if not current_paragraph.endswith('-') and current_paragraph != "":
                    current_paragraph += " "
                current_paragraph += text_str
            else:
                if current_paragraph:
                    text_content.append(current_paragraph)
                current_paragraph = text_str
        else:
            current_paragraph = text_str  # Initialize the first paragraph
        
        previous_item = item

        # # At end of line, decide if it's a headline
        if is_end_of_line and current_paragraph:
            if current_paragraph.endswith('.') or len(item['str'].split()) < 5:
                text_content.append(current_paragraph)
                current_paragraph = ""

    if current_paragraph:  # Append the last paragraph if any
        text_content.append(current_paragraph)

    return text_content

# Process the JSON data and apply the refined headline detection and merging logic
processed_text = process_json_file(data)
print("\n".join(processed_text[:20]))  # Display the first 20 lines to check the output

Automated Unit Test Improvement using Large Language Models at Meta
Nadia Alshahwan
âˆ—
Jubin Chheda
Anastasia Finegenova
Beliz Gokkaya
Mark Harman
Inna Harper
Alexandru Marginean
Shubho Sengupta
Eddy Wang 
Meta Platforms Inc.,
Menlo Park, California, USA 
ABSTRACT 
This paper describes Metaâ€™s TestGen-LLM tool, which uses LLMs to automatically improve existing human-written tests. TestGen-LLM verifies that its generated test classes successfully clear a set of filters that assure measurable improvement over the original test suite, thereby eliminating problems due to LLM hallucination.
We describe the deployment of TestGen-LLM at Meta test-a-thons for the Instagram and Facebook platforms. In an evaluation on Reels and Stories products for Instagram, 75% of TestGen-LLMâ€™s test cases built correctly, 57% passed reliably, and 25% increased coverage. During Metaâ€™s Instagram and Facebook test-a-thons, it improved 11.5% of all classes to which it was applied, with 73% of its recommendat

: 