### Playing with `claude-v1.3-100k`

In [1]:
import os
import anthropic
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import openai
import time
import tempfile
import shutil
from pathlib import Path
import subprocess
from tqdm import tqdm

In [2]:
client = anthropic.Client(api_key=os.environ['ANTHROPIC_API_KEY'])

In [3]:
paper_id = '2108.07258'

## ContextualQA 

In [4]:
class ContextualQA:
    def __init__(self, client, model="claude-v1.3-100k"):
        self.client = client
        self.model = model
        self.context = ""
        self.questions = []
        self.responses = []

    def load_text(self, text):
        self.context = text

    def ask_question(self, question):
        leading_prompt = "Consider the text document below:"
        trailing_prompt = "Now answer the following question, use LaTex to format your answer."
        prompt = f"{anthropic.HUMAN_PROMPT} {leading_prompt}\n\n{self.context}\n\n{trailing_prompt}\n\n{anthropic.HUMAN_PROMPT} {question} {anthropic.AI_PROMPT}"
        response = self.client.completion_stream(
            prompt=prompt,
            stop_sequences=[anthropic.HUMAN_PROMPT],
            max_tokens_to_sample=6000,
            model=self.model,
            stream=False,
        )
        responses = [data for data in response]
        self.questions.append(question)
        self.responses.append(responses)
        return responses

    def clear_context(self):
        self.context = ""
        self.questions = []
        self.responses = []

    def __getstate__(self):
        state = self.__dict__.copy()
        del state["client"]
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        self.client = None


In [5]:
# load previous qa_system if it exists
if os.path.exists(f'{paper_id}.pkl'):
    with open(f'{paper_id}.pkl', 'rb') as f:
        qa_system = pickle.load(f)
        qa_system.client = client  # Manually set the client after loading

In [6]:
# Initialize the QA system
qa_system = ContextualQA(client)

# Load a text
# Load the LaTeX files
file_names = ['introduction.tex', 'overview.tex', 'capabilities.tex', 'applications.tex',
              'technology.tex', 'society.tex', 'conclusion.tex']


combined_tex_content = ''
for file_name in file_names:
    file_path = os.path.join(paper_id, file_name)
    with open(file_path, 'r') as f:
        combined_tex_content += f.read()


In [7]:
len(combined_tex_content)

48997

In [8]:
# Create a QA system
qa_system.load_text(combined_tex_content)

In [9]:
# Sanity check
questions = [
    "What is the title of the paper?",
    "Give me the abstract of the paper.",
    "Summarize the paper in a few sentences.",
    "Does the paper talk about Tax?",
    "Does the paper mention white collar workers?",
    "Does the paper mention blue collar workers?",
]

In [10]:
# Ask a questions in questions list
for question in tqdm(questions):
    responses = qa_system.ask_question(question)

100%|██████████| 6/6 [01:00<00:00, 10.12s/it]


In [11]:
# generated by GPT-4
questions = [
    "What are foundation models?",
    "How are foundation models developed and trained?",
    "What are the key technologies or techniques used in the development of foundation models?",
    "What are the primary applications of foundation models?",
    "How have foundation models impacted various industries economically?",
    "What are the potential economic benefits of using foundation models?",
    "What are the potential economic drawbacks or risks of using foundation models?",
    "How have foundation models affected job markets and employment?",
    "How have foundation models influenced social dynamics and interactions?",
    "What are the potential social benefits of using foundation models?",
    "What are the potential social drawbacks or risks of using foundation models?",
    "How have foundation models impacted education and learning?",
    "How have foundation models affected communication and information dissemination?",
    "What are the ethical considerations surrounding the use of foundation models?",
    "How are foundation models regulated, if at all?",
    "What are some case studies or examples of the impact of foundation models?",
    "How might the use of foundation models evolve in the future?",
    "What are some potential solutions to the challenges posed by foundation models?",
    "How do foundation models compare to other technologies in terms of economic and social impact?",
    "What are the key takeaways from the paper regarding the economic and social impact of foundation models?",
]

In [12]:
# Ask a questions in questions list
for question in tqdm(questions):
    responses = qa_system.ask_question(question)

100%|██████████| 20/20 [06:22<00:00, 19.13s/it]


In [13]:
# Save
with open(f'{paper_id}.pkl', 'wb') as f:
    pickle.dump(qa_system, f)

In [14]:
def latex_escape(text):
    text = text.replace("_", r"\_")
    text = text.replace("$", r"\$")
    return text

def qa_to_pdf(question, answer, output_filename):
    latex_template = r"""
    \documentclass{{standalone}}
    \usepackage[utf8]{{inputenc}}
    \usepackage{{amsmath}}
    \usepackage{{amssymb}}
    \usepackage{{hyperref}}
    \usepackage{{varwidth}}
    \usepackage{{adjustbox}}
    \begin{{document}}
    \begin{{adjustbox}}{{margin=5mm}}
    \begin{{varwidth}}{{\linewidth}}
    \textbf{{Question:}} \\
    {question} \\
    \textbf{{Answer:}} \\
    {answer}
    \end{{varwidth}}
    \end{{adjustbox}}
    \end{{document}}
    """

    answer = answer[0]['completion']
    latex_content = latex_template.format(question=latex_escape(question), answer=latex_escape(answer))

    with tempfile.TemporaryDirectory() as temp_dir:
        tex_file = Path(temp_dir) / "qa.tex"
        pdf_file = Path(temp_dir) / "qa.pdf"

        with open(tex_file, "w") as f:
            f.write(latex_content)

        subprocess.run(
            [
                "pdflatex",
                "-interaction=nonstopmode",
                "-output-directory",
                temp_dir,
                tex_file,
            ],
            check=True,
        )

        shutil.copy(pdf_file, output_filename)


In [16]:
def save_qa_to_pdf(qa_system, output_dir):
    for i, (question, answer) in enumerate(zip(qa_system.questions, qa_system.responses), 1):
        output_filename = os.path.join(output_dir, f"QA_{i}.pdf")
        try:
            qa_to_pdf(question, answer, output_filename)
        except Exception as e:
            print(f"Failed to generate PDF for question {i}: {e}")


# creat a folder to save the pdf files
output_folder = f"./{paper_id}_results"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

save_qa_to_pdf(qa_system, output_folder)

This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex)
 restricted \write18 enabled.
entering extended mode
(/var/folders/gs/ftfl94c117q3mq9rqwfnx1mc0000gn/T/tmp92830a1c/qa.tex
LaTeX2e <2022-11-01> patch level 1
L3 programming layer <2023-02-22>
(/usr/local/texlive/2023/texmf-dist/tex/latex/standalone/standalone.cls
Document Class: standalone 2022/10/10 v1.3b Class to compile TeX sub-files stan
dalone
(/usr/local/texlive/2023/texmf-dist/tex/latex/tools/shellesc.sty)
(/usr/local/texlive/2023/texmf-dist/tex/generic/iftex/ifluatex.sty
(/usr/local/texlive/2023/texmf-dist/tex/generic/iftex/iftex.sty))
(/usr/local/texlive/2023/texmf-dist/tex/latex/xkeyval/xkeyval.sty
(/usr/local/texlive/2023/texmf-dist/tex/generic/xkeyval/xkeyval.tex
(/usr/local/texlive/2023/texmf-dist/tex/generic/xkeyval/xkvutils.tex
(/usr/local/texlive/2023/texmf-dist/tex/generic/xkeyval/keyval.tex))))
(/usr/local/texlive/2023/texmf-dist/tex/latex/standalone/standalone.cfg)
(/usr/local/