In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import research_agents.arxiv_download as adl
import research_agents.html_to_ref_list as h2rl
import research_agents.summarize as summ

from IPython.display import display, HTML

from markdown import markdown
from bleach import clean

import anthropic

client = anthropic.Client(api_key="sk-ant-FILL-IN-THIS-KEY")


In [None]:
# HTML for the RAD paper
rad_html = adl.download_arxiv_html('https://arxiv.org/pdf/2004.14990.pdf')
curl_html = adl.download_arxiv_html('https://ar5iv.labs.arxiv.org/html/2004.04136')
drq_html = adl.download_arxiv_html('https://arxiv.org/pdf/2004.13649.pdf')
refs = h2rl.get_refs(rad_html)
matching_ref = h2rl.find_ref_for("CURL: Contrastive Unsupervised Representations for Reinforcement Learning", refs)

In [None]:
def safe_markdown_render(markdown_src: str) -> str:
    """Safely render Markdown to HTML, ensuring that there's no hidden JS and no
    images (or other tags that could load external resources)."""
    html = markdown(markdown_src)
    return clean(html, tags=['div', 'span', 'p', 'em', 'strong', 'code', 'pre'], protocols=[])

display(HTML("<h1>Summary of input paper</h1>"))
summary_markdown = summ.summarise_paper_md(client, curl_html)
display(HTML(safe_markdown_render(summary_markdown)))

In [None]:
def format_snippets(snippets):
    """Format snippets in a Markdown-esque way"""
    parts = []
    for idx, snip in enumerate(snippets, start=1):
        parts.append(f"- Snippet {idx}: {snip}")
    return "\n".join(parts)

curl_title, curl_abstract = h2rl.get_title_and_abstract(curl_html)
rad_title, rad_abstract = h2rl.get_title_and_abstract(rad_html)
drq_title, drq_abstract = h2rl.get_title_and_abstract(drq_html)

rad_formatted_snippets_about_curl = format_snippets(matching_ref["snippets"])
rad_inline_style = matching_ref["inline"].strip()

drq_formatted_snippets_about_curl = format_snippets(h2rl.find_ref_for(curl_title, h2rl.get_refs(drq_html))["snippets"])
drq_inline_style = h2rl.find_ref_for(curl_title, h2rl.get_refs(drq_html))["inline"].strip()

template = """
Here is a summary of the original paper:

Original paper title: {original_title}

Abstract: {original_abstract}

-------

Here is a summary from a follow-up paper:

Follow-up title: {follow_up_title}

Follow-up abstracts: {follow_up_abstract}

In this follow-up paper, the original paper may be referred to by name ({original_title}), or by an inline citation style ({follow_up_inline}).
Here are snippets where the follow-up paper mentions the original paper:

{follow_up_snippets}

-----

Can you summarize the new information that the follow-up paper reveals about the original paper? This should be information that is not present in the \
original paper's abstract, but is mentioned in the follow-up paper's abstract or snippets. Provide your answer as dot points that \
quote the follow-up paper. Make a summary, don't use just direct quotes. Don't assume the reader knows about the snippets (VERY IMPORTANT: do not mention the snippets in the dot points). Filter out \
dot points that do not point out differences between the original paper and the follow-up paper.
"""

# print(
#     template.format(
#         original_title=curl_title,
#         original_abstract=curl_abstract,
#         follow_up_title=rad_title,
#         follow_up_abstract=rad_abstract,
#         follow_up_inline=rad_inline_style,
#         follow_up_snippets=rad_formatted_snippets_about_curl,
#     )
# )

In [None]:
prompt = template.format(
        original_title=curl_title,
        original_abstract=curl_abstract,
        follow_up_title=rad_title,
        follow_up_abstract=rad_abstract,
        follow_up_inline=rad_inline_style,
        follow_up_snippets=rad_formatted_snippets_about_curl,
    )

resp_rad = client.completion(
            prompt=f"{anthropic.HUMAN_PROMPT} {prompt} {anthropic.AI_PROMPT}",
            stop_sequences=[anthropic.HUMAN_PROMPT],
            model="claude-1",
            max_tokens_to_sample=500,
            temperature=0
        )


print(resp_rad["completion"])

In [None]:
client = anthropic.Client(api_key="sk-ant-api03-FILL-IN-THIS-KEY")

prompt = template.format(
        original_title=curl_title,
        original_abstract=curl_abstract,
        follow_up_title=drq_title,
        follow_up_abstract=drq_abstract,
        follow_up_inline=drq_inline_style,
        follow_up_snippets=drq_formatted_snippets_about_curl,
    )

resp_curl = client.completion(
            prompt=f"{anthropic.HUMAN_PROMPT} {prompt} {anthropic.AI_PROMPT}",
            stop_sequences=[anthropic.HUMAN_PROMPT],
            model="claude-1",
            max_tokens_to_sample=500,
            temperature=0
        )


print(resp_curl["completion"])

In [None]:
template = """
    The following are comparisons of the original paper {original_title} with follow-up papers. Can you summarize all these dot points into a single paragraph?

    {comparison1}
    
    {comparison2}
    

    """

print(template.format(original_title=curl_title, comparison1=resp_rad["completion"], comparison2=resp_curl["completion"]))


In [None]:
prompt = template.format(original_title=curl_title, comparison1=resp_rad["completion"], comparison2=resp_curl["completion"])

resp = client.completion(
            prompt=f"{anthropic.HUMAN_PROMPT} {prompt} {anthropic.AI_PROMPT}",
            stop_sequences=[anthropic.HUMAN_PROMPT],
            model="claude-1",
            max_tokens_to_sample=500,
            temperature=0
        )

print(resp["completion"])