In [3]:
import getpass
import os

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [3]:
file_path = "./1706.03762v7.pdf"

In [4]:
from PyPDF2 import PdfReader

# Load the PDF file
pdf_path = "./1706.03762v7.pdf"
pdf_reader = PdfReader(pdf_path)

# Extract text from the PDF

extracted_text = []
collecting = True

for page in pdf_reader.pages:
    text = page.extract_text()
    if text and collecting:
        extracted_text.append(text)
        
        # Check for the end condition, the section after "Conclusion"
        if "Conclusion" in text:
            conclusion_start = text.index("Conclusion")
            extracted_text.append(text[conclusion_start:])
            collecting = False  # Stop collecting after the section following Conclusion

# Join all collected text
final_text_to_section_after_conclusion = "\n".join(extracted_text)

# Save to .txt file
output_path_to_section_after_conclusion = "./extracted_text_to_section_after_conclusion.txt"
with open(output_path_to_section_after_conclusion, "w") as file:
    file.write(final_text_to_section_after_conclusion)

output_path_to_section_after_conclusion



'./extracted_text_to_section_after_conclusion.txt'

In [14]:
# read the whole file in one string 
with open(output_path_to_section_after_conclusion, "r") as file:
    text = file.read()


In [5]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")


In [6]:
from langchain_core.prompts import ChatPromptTemplate

plan_prompt = ChatPromptTemplate.from_template("""You are a very clever planner of podcast scripts. You will be given the text of a research paper, and your task will be to generate a plan for a podcast involving 3 persons discussing about the content of the paper in a very engaging, interactive and enthusiastic way. The plan will be structured using titles and bullet points only. The plan for the podcast should follow the structure of the paper. The podcast involves the following persons:
- The host: he will present the paper and its details in a very engaging way. very professional, friendly, warm and enthusiastic.
- The learner: he will ask clever and significative questions about the paper and its content. he is curious and funny.
- The expert: he will provide deep insights, comments and details about the content of the paper and other related topics. he talks less than the two other and his interventions are more profound and detailed.
Example of a structure for the podcast:
# Title: title of the podcast
# Section 1: title of section 1
- bullet point 1
- bullet point 2
- bullet point 3
...
- bullet point n
# Section 2: title of section 2
- bullet point 1
- bullet point 2
- bullet point 3
...
- bullet point n
# Section 3: title of section 3
...
# Section n: title of section n
- bullet point 1
- bullet point 2
- bullet point 3
...
- bullet point n
The paper: {paper}
The podcast plan in titles and bullet points:""")
                                               


In [7]:
plan = llm.invoke(plan_prompt.invoke({"paper": text}))


In [19]:
print(plan.content)

# Title: Unpacking the Transformer: The Future of Sequence Transduction

## Section 1: Introduction to the Transformer
- Host introduces the paper "Attention Is All You Need" and its significance in AI.
- Learner shares a funny anecdote about previous models like RNNs or CNNs.
- Expert outlines the core idea behind the Transformer model and its reliance on attention mechanisms.

## Section 2: Background on Sequence Models
- Host explains the limitations of recurrent and convolutional neural networks in sequence modeling.
- Learner asks why traditional models struggle with longer sequences and parallelization.
- Expert dives deep into the history of sequence models and how attention mechanisms have been integrated.

## Section 3: The Transformer Architecture
- Host presents an overview of the Transformer’s architecture and its components.
- Learner humorously compares the architecture to a complex recipe.
- Expert details the encoder-decoder structure and the significance of self-attent

In [8]:
from langchain_core.messages import AIMessage
import re
def parse_script_plan(ai_message: AIMessage) -> list:
    # Initialize the sections list
    sections = []
    current_section = []
    
    # Split the text by line and skip the first line as the title
    lines = ai_message.content.strip().splitlines()
    lines = lines[1:]  # Skip the first line (title)

    # Regex patterns for any level of headers and bullet points
    header_pattern = re.compile(r"^#+\s")  # Match headers with any number of #
    bullet_pattern = re.compile(r"^- ")     # Match lines starting with a bullet point "- "

    # Parse each line, starting with the first header after the title
    for line in lines:
        if header_pattern.match(line):
            # Append the previous section (if any) to sections when a new header is found
            if current_section:
                sections.append(" ".join(current_section))
                current_section = []
            # Start a new section with the header
            current_section.append(line.strip())
        elif bullet_pattern.match(line):
            # Append bullet points to the current section
            current_section.append(line.strip())

    # Append the last section if exists
    if current_section:
        sections.append(" ".join(current_section))
    
    return sections


In [9]:
from PyPDF2 import PdfReader

# Load the PDF file
pdf_path = "./1729936904777.pdf"
pdf_reader = PdfReader(pdf_path)

# Extract text from the PDF

extracted_text = []
collecting = True

for page in pdf_reader.pages:
    text = page.extract_text()
    if text and collecting:
        extracted_text.append(text)
        
        # Check for the end condition, the section after "Conclusion"
        if "Conclusion" in text:
            conclusion_start = text.index("Conclusion")
            extracted_text.append(text[conclusion_start:])
            collecting = False  # Stop collecting after the section following Conclusion

# Join all collected text
final_text_to_section_after_conclusion = "\n".join(extracted_text)

# Save to .txt file
output_path_to_section_after_conclusion = "./paper2.txt"
with open(output_path_to_section_after_conclusion, "w") as file:
    file.write(final_text_to_section_after_conclusion)

output_path_to_section_after_conclusion

with open(output_path_to_section_after_conclusion, "r") as file:
    text = file.read()

In [10]:
plan_script_chain = plan_prompt | llm | parse_script_plan
# sections = plan_script_chain.invoke({"paper": text})
# print(sections)

In [11]:
discuss_prompt_template = ChatPromptTemplate.from_template("""You are a very clever scriptwriter of podcast discussions. You will be given a plan for a section of the middle of a podcast that already started involving 3 persons discussing about the content of a research paper. Your task will be to generate a brief dialogue for the podcast talking about the given section, do not include voice effects, and do not make an introduction. The dialogue should be engaging, interactive, enthusiastic and have very clever transitions and twists. The dialogue should follow the structure of the plan. The podcast involves the following persons:
- The host: he will present the paper and its details in a very engaging way. very professional, friendly, warm and enthusiastic.
- The learner: he will ask clever and significative questions about the paper and its content. he is curious and funny.
- The expert: he will provide deep insights, comments and details about the content of the paper and other related topics. he talks less than the two other and his interventions are more profound and detailed.
Dialogue example 1:
Host: Let's continue with the second section of the paper ... 
Learner: I have a question about ...
Expert: I would like to add ... 
Dialogue example 2:
Host: Now, let's move on to the next section ...
Expert: I think that ...
Learner: I have a question about ...
Expert: I would like to add ...
Dialogue example 3:
Learner: Should we move on to the next section?
Host: Yes, let's move on to the next section ...
Expert: I think that ...
Section plan: {section_plan}
Previous dialogue (to avoid repetitions): {previous_dialogue}
Additional context:{additional_context}
Brief section dialogue:""")

                                                                                                             

In [19]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser

# Load, chunk and index the contents of the blog.
loader = TextLoader("./paper2.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()



def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# discuss_rag_chain = (
#     {"additional_context": retriever | format_docs, "section_plan": RunnablePassthrough(), "previous_dialogue": RunnablePassthrough()}
#     | discuss_prompt_template
#     | llm
#     | StrOutputParser()
# )

# dialogue = discuss_rag_chain.invoke(sections[0])


In [37]:
print(dialogue)

Host: Now that we’ve set the stage with the importance of model evaluation, let’s dive deeper into the challenges we face with current evaluation methods. It’s a real balancing act between objective and subjective criteria. 

Learner: Objective vs. subjective? Sounds like a debate between a robot and a poet! But seriously, how do we even begin to evaluate AI models without getting lost in the complexity?

Expert: That’s a great point! The crux of the issue is that while human evaluations can reflect real-world usage scenarios, they come with significant drawbacks—like cost and a lack of reproducibility. 

Host: Exactly! Imagine trying to gather a hundred people to judge an AI model’s output every time—it’s just not feasible. This brings us to the star of our discussion: CompassJudger-1. 

Learner: Wait, is this like the superhero of AI evaluation? What makes CompassJudger-1 stand out from the crowd? 

Expert: Well, CompassJudger-1 is indeed a game changer. As the first open-source all-

In [12]:
def get_head(pdf_path: str) -> str:
    # Load the PDF file
    pdf_reader = PdfReader(pdf_path)

    # Extract content from the beginning of the PDF until the section "Introduction"
    extracted_text = []
    collecting = True

    for page in pdf_reader.pages:
        text = page.extract_text()
        if text and collecting:
            # Stop collecting once "Introduction" is found
            if "Introduction" in text:
                introduction_index = text.index("Introduction")
                extracted_text.append(text[:introduction_index])  # Only collect content before "Introduction"
                break
            else:
                extracted_text.append(text)

    # Join the collected text and return as a single string
    return "\n".join(extracted_text)

In [14]:
from langchain_core.output_parsers import StrOutputParser
initial_dialogue_prompt = ChatPromptTemplate.from_template("""You are a very clever scriptwriter of podcast introductions. You will be given the title of a paper and a brief glimpse of the content of a research paper. Avoid using sound effects, only text. Avoid finishing with the host, finish the dialogue with the expert. Your task will be to generate an engaging and enthusiastic introduction for the podcast. The introduction should be captivating, interactive, and should make the listeners eager to hear the discussion. The introduction of the podcast should have 3 interactions only. The podcast involves the following persons:
- The host: he will present the paper and its details in a very engaging way. very professional, friendly, warm and enthusiastic.
- The learner: he will ask clever and significative questions about the paper and its content. he is curious and funny.
- The expert: he will provide deep insights, comments and details about the content of the paper and other related topics. he talks less than the two other and his interventions are more profound and detailed.
Introduction example 1:
Host: Welcome to our podcast, today we will be discussing the paper ...
Learner: I am very curious about ...
Expert: I think that ...
Introduction example 2:
Host: Hello everyone, today we have a very interesting paper to discuss ...
Expert: I would like to add ...
Learner: I have a question about ...
Content of the paper: {paper_head}
Brief 3 interactions introduction:""")
initial_dialogue_chain = initial_dialogue_prompt | llm | StrOutputParser()

In [43]:
initial_dialogue = initial_dialogue_chain.invoke({"paper_head": get_head(file_path)})
print(initial_dialogue)

**Host:** Welcome back to another exciting episode of our podcast! Today, we’re diving into the groundbreaking paper, “Attention Is All You Need,” which introduces the revolutionary Transformer model that’s reshaping the landscape of natural language processing. Now, this isn’t just any paper – it’s a game-changer! 

**Learner:** I mean, “Attention Is All You Need”? That sounds both captivating and a bit intense! But seriously, what does it mean to ditch recurrence and convolutions entirely? How does that even work?

**Expert:** That’s a great question! The essence of the Transformer architecture lies in its unique approach to attention mechanisms, which allows for efficient parallelization and significantly faster training times. It’s a profound shift in how we understand sequence transduction!

**Host:** Exactly! And we’ll explore just how the Transformer model achieved state-of-the-art results in translation tasks, outperforming its predecessors. So, stay tuned, because you won't wa

# Wrap up

In [23]:
from langchain_core.runnables import RunnableParallel
from operator import itemgetter
def parse_pdf(pdf_path: str, output_path: str) -> str:
    pdf_reader = PdfReader(pdf_path)

    # Extract text from the PDF
    extracted_text = []
    collecting = True

    for page in pdf_reader.pages:
        text = page.extract_text()
        if text and collecting:
            extracted_text.append(text)

            # Check for the end condition, the section after "Conclusion"
            if "Conclusion" in text:
                conclusion_start = text.index("Conclusion")
                extracted_text.append(text[conclusion_start:])
                collecting = False  # Stop collecting after the section following Conclusion

    # Join all collected text
    final_text_to_section_after_conclusion = "\n".join(extracted_text)

    # Save to .txt file
    with open(output_path, "w") as file:
        file.write(final_text_to_section_after_conclusion)

    return output_path

def initialize_discussion_chain(txt_file):
    # Load, chunk and index the contents of the blog.
    loader = TextLoader(txt_file)
    docs = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

    # Retrieve and generate using the relevant snippets of the blog.
    retriever = vectorstore.as_retriever()



    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    discuss_rag_chain = (
        {"additional_context": itemgetter("section_plan") | retriever | format_docs, 
         "section_plan": itemgetter("section_plan"), 
         "previous_dialogue": itemgetter("previous_dialogue")
         }
        | discuss_prompt_template
        | llm
        | StrOutputParser()
    )
    return discuss_rag_chain

In [17]:
file_path = "./1729936904777.pdf"

In [24]:

# step 1: parse de pdf file and extract the text into a .txt file
from PyPDF2 import PdfReader

# Load the PDF file
pdf_path = "./1729936904777.pdf" # to change with the user input



output_path = parse_pdf(pdf_path, "./paper2.txt") # to change with an automatically generate file name

# get the text from the .txt file
with open(output_path, "r") as file:
    text = file.read()

# step 2: generate a plan for the podcast 
plan = plan_script_chain.invoke({"paper": text})
print(plan)

# step 3: generate the actual script for the podcast by looping over the sections of the plan
script = ""
# generate the initial dialogue
initial_dialogue = initial_dialogue_chain.invoke({"paper_head": get_head(file_path)})
print(initial_dialogue)
script += initial_dialogue
actual_script = initial_dialogue
discuss_rag_chain = initialize_discussion_chain(output_path)
for section in plan:
    section_script = discuss_rag_chain.invoke({"section_plan": section, "previous_dialogue": actual_script})
    print(section_script)
    script += section_script
    actual_script = section_script

print("final script", script)



['# Section 1: Introduction to CompassJudger-1 - Host introduces the paper and its authors with enthusiasm - Learner asks: "What’s the big deal about evaluating language models anyway?" - Expert explains the importance of objective vs. subjective evaluation in LLMs - Discussion on the limitations of human evaluations and the need for automated judgers', '# Section 2: The Need for CompassJudger-1 - Host outlines the challenges faced in LLM evaluations - Learner quips: "So, we need a superhero for model evaluations! Is that CompassJudger?" - Expert dives into the specifics of existing models and their limitations - Discussion on the implications of open-source tools for research communities', '# Section 3: Features of CompassJudger-1 - Host lists the capabilities of CompassJudger-1: scoring, comparisons, critiques - Learner asks: "Can it also make coffee? Just kidding! But seriously, how versatile is it?" - Expert elaborates on the versatility and adaptability of the model - Discussion o

In [27]:
enhance_prompt = ChatPromptTemplate.from_template("""You are a very clever scriptwriter of podcast discussions. You will be given a script for a podcast involving 3 persons discussing about the content of a research paper. Your task will be to enhance the script by removing audio effects mentions and reducing repetition and redundancy. Don't mention sound effects, laughing, chuckling or any other audio effects between brackets. The script should only contain what the persons are saying and not what are they doing or how they are saying it. Enhance the transitions and the twists, and reduce repetition and redundancy.
The draft script{draft_script}
The enhanced script:""")
enhance_chain = enhance_prompt | llm | StrOutputParser()
enhanced_script = enhance_chain.invoke({"draft_script": script})

In [29]:
print(script)

**Host:** Alright, let’s dive into the heart of the paper! We’ve established the significance of evaluating language models, but let’s tackle the big question: What’s the big deal about evaluating language models anyway? 

**Learner:** Right? I mean, can’t we just throw them out there and see if they sink or swim? What’s with all the fuss over evaluations?

**Expert:** That’s a great question! Evaluating language models is crucial because it directly impacts their performance in real-world applications. The paper highlights that subjective evaluations can align better with human preferences but come with limitations—like being costly and often not reproducible.

**Host:** Exactly! Human evaluations can be like trying to catch smoke with your bare hands—essentially elusive. So how do we strike a balance?

**Learner:** Ah, so you're saying we need a lifebuoy in this ocean of complexity, right? What’s the alternative then? 

**Expert:** Precisely! This is where automated evaluators come i

In [30]:
print(initial_dialogue + enhanced_script)

**Host:** Welcome back to our podcast, where today we’re diving deep into the fascinating world of language model evaluation! We'll be exploring the groundbreaking paper titled "CompassJudger-1: All-in-one Judge Model Helps Model Evaluation and Evolution." This research unpacks how an ingenious new tool can revolutionize the way we assess large language models. So, buckle up for an exciting discussion!

**Learner:** Wow, that sounds super intriguing! I can't wait to find out what makes CompassJudger-1 stand out from other evaluation methods. I mean, can it really keep up with the complexity of human preferences? 

**Expert:** Indeed, the nuances of human judgment are challenging to replicate. However, what’s compelling about CompassJudger-1 is its ability to bridge that gap through sophisticated automated evaluations, aligning more closely with real-world applications. 

**Host:** Exactly! And later, we’ll uncover how the newly established JudgerBench benchmark plays a critical role in