# Novelty Agent

In [1]:
import requests
from crewai_tools import BaseTool
import os
import boto3
from dotenv import load_dotenv
import json
from anthropic import AnthropicBedrock
import time

In [2]:
class NoveltyAgent(BaseTool):
    name:str = "novelty-agent"
    description:str = ""
        
    def _run(self, argument: dict) -> list[str]:
        pass
    
    def generate_search(self, client, argument):
        print("============================")
        print("GENERATING SEARCH PHRASES")
        print("============================")
        # Generate Keywords
        search_phrases = []

        # First Search Phrase 
        messages = [{
            "role": "user",
            "content": f'''
            Given the abstract of an academic paper below, generate a search phrase of less than 10 words to find related papers in the field. Return ONLY this phrase
            This phrase should be useful for searching for similar papers in academic databases. Use general terms that reflect domain-specific field knowledge to 
            enable a fruitful search. 

            Abstract: {argument['abstract']}
            '''
        }]

        keywords = client.messages.create(
            model="anthropic.claude-3-5-sonnet-20240620-v1:0",
            max_tokens=128,
            messages=messages
        )

        content = keywords.content
        if isinstance(content, list) and len(content) > 0:
            content = content[0].text
        else:
            content = str(content)

        final_kw = content.strip()
        print(f"First phrase: {final_kw}\n")
        search_phrases.append(final_kw)
        
        # Second Search Phrase
        messages = [{
            "role": "user",
            "content": f'''
            Given the abstract of an academic paper and a previously generated search phrase, create a new, broader search phrase of less than 10 words. 
            This new phrase should expand the search scope to include related concepts or methodologies not covered by the first phrase. 
            Return ONLY this new phrase.

            Abstract: {argument['abstract']}
            Previous search phrase: {search_phrases}
            '''
        }]

        keywords = client.messages.create(
            model="anthropic.claude-3-5-sonnet-20240620-v1:0",
            max_tokens=128,
            messages=messages
        )

        content = keywords.content
        if isinstance(content, list) and len(content) > 0:
            content = content[0].text
        else:
            content = str(content)

        final_kw = content.strip()
        print(f"Second phrase: {final_kw}\n")
        search_phrases.append(final_kw)
        
        # Third Search Phrase
        messages = [{
            "role": "user",
            "content": f'''
            Given an academic paper abstract and two previously generated search phrases, create a final, even broader search phrase of less than 10 words. 
            This phrase should capture the most general concepts related to the paper's field of study, potentially including interdisciplinary connections. 
            The goal is to cast the widest possible net for related research. Return ONLY this new phrase.

            Abstract: {argument['abstract']}
            Previous search phrase: {search_phrases}
            '''
        }]

        keywords = client.messages.create(
            model="anthropic.claude-3-5-sonnet-20240620-v1:0",
            max_tokens=128,
            messages=messages
        )

        content = keywords.content
        if isinstance(content, list) and len(content) > 0:
            content = content[0].text
        else:
            content = str(content)

        final_kw = content.strip()
        print(f"Third phrase: {final_kw}\n")
        search_phrases.append(final_kw)
        return search_phrases

    def search_related_papers(self, client, argument, search_phrases):
        print("============================")
        print("SEARCHING FOR RELATED PAPERS")
        print("============================")
        search_url = "https://api.semanticscholar.org/graph/v1/paper/search"
        api_key = os.getenv("X_API_KEY")
        
        related_papers = {}
        if not api_key:
            print("API key is missing. Please set the X_API_KEY environment variable.")
        headers = {
            "x-api-key": api_key,
            "Content-Type": "application/json",
        }
        
        # Search 1:
        params = {
            "query": search_phrases[0],
            "fields": "title,abstract",
            "limit": 10  # Number of results to retrieve
        }

        response = requests.get(search_url, headers=headers, params=params)
        if response.status_code == 200:
            response_json = response.json()
            if 'data' in response_json:
                entries = response_json['data']
                print(f'Query 1 produced {len(entries)} results')
                related_papers.update({entry['title']: entry['abstract'] for entry in entries if 'title' in entry and 'abstract' in entry})
            else:
                print("No 'data' key in the response. Response structure:")
                print(json.dumps(response_json, indent=2))
        else:
            print(f"Error: {response.status_code} - {response.text}")
        
        # Search 2:
        time.sleep(1)
        params = {
            "query": search_phrases[1],
            "fields": "title,abstract",
            "limit": 10  # Number of results to retrieve
        }

        response = requests.get(search_url, headers=headers, params=params)
        if response.status_code == 200:
            response_json = response.json()
            if 'data' in response_json:
                entries = response_json['data']
                print(f'Query 2 produced {len(entries)} results')
                related_papers.update({entry['title']: entry['abstract'] for entry in entries if 'title' in entry and 'abstract' in entry})
            else:
                print("No 'data' key in the response. Response structure:")
                print(json.dumps(response_json, indent=2))
        else:
            print(f"Error: {response.status_code} - {response.text}")
        # Search 3:
        time.sleep(1)
        params = {
            "query": search_phrases[2],
            "fields": "title,abstract",
            "limit": 10  # Number of results to retrieve
        }

        response = requests.get(search_url, headers=headers, params=params)
        if response.status_code == 200:
            response_json = response.json()
            if 'data' in response_json:
                entries = response_json['data']
                print(f'Query 3 produced {len(entries)} results')
                related_papers.update({entry['title']: entry['abstract'] for entry in entries if 'title' in entry and 'abstract' in entry})
            else:
                print("No 'data' key in the response. Response structure:")
                print(json.dumps(response_json, indent=2))
        else:
            print(f"Error: {response.status_code} - {response.text}")
        print("Titles of Related Papers Found:")
        print(list(related_papers.keys()))
        return related_papers
    
    def filter_papers(self, client, argument, related_papers):
        print("============================")
        print("FILTERING FOR RELEVANT PAPERS")
        print("============================")
        
        filtered_dict = {}
        
        for title, abstract in related_papers.items():
            messages = [{
                "role": "user",
                "content":f'''
                Assess the relevancy of the following paper to the core paper. Be strict in your assessment
                and only consider it relevant if it closely relates to the core concept.
                If the core paper and the paper to assess are the same thing, your assessment is "Irrelevant"
                Core Paper:
                Title: {argument['title']}
                Abstract: {argument['abstract']}
                
                Paper to Assess:
                Title: {title}
                Abstract: {abstract}
                
                Provide your assessment as a single word: "Relevant" or "Irrelevant".
                Only output the single word with no other text or explanation
                '''
            }]
            
            response = client.messages.create(
                model="anthropic.claude-3-5-sonnet-20240620-v1:0",
                max_tokens=2,
                messages=messages
            )
            content = response.content
            if isinstance(content, list) and len(content) > 0:
                content = content[0].text
            else:
                content = str(content)

            res = content.strip()
            if res.lower() == "relevant":
                filtered_dict[title] = abstract
            
        print(f"Original length: {len(related_papers.keys())}")
        print(f"Filtered length: {len(filtered_dict.keys())}")
        
        return filtered_dict

    
    def assess_novelty(self, client, argument, filtered_dict):
        print("============================")
        print("ASSESSING NOVELTY")
        print("============================")
        
        
        # Loop through for novelty assessment.
        results = []
        for title, abstract in filtered_dict.items():
            print(f"Comparing with: {title} \n")
            messages = [{
                "role": "user",
                "content": f'''
                As a novelty assessor, compare the following proposed academic paper abstract with an existing paper's abstract.
                Evaluate whether the new paper presents a significantly novel idea or approach compared to the existing paper.
                
                New Paper: 
                Title: {argument['title']}
                Abstract: {argument['abstract']}
                
                Existing Paper
                Title: {title}
                Abstract: {abstract}
                
                Please consider:
                1. A brief comparison of the key ideas, methods, or findings
                2. An assessment of the novelty of the new paper compared to the existing one.
                3. A clear decision: Is the new paper sufficiently novel compared to this existing paper? Answer with "Novel" or "Not Novel".
                
                However, in your response, simply provide a decision and a 2-3 sentence justification for your decision.
                
                Format your response as follows:
                
                Decision: [Novel/Not Novel]
                
                Justification: [Your Assessment Here]
                '''
            }]
            response = client.messages.create(
                model="anthropic.claude-3-5-sonnet-20240620-v1:0",
                max_tokens=256,
                messages=messages
            )
            
            # Format response to text only:
            response = response.content
            if isinstance(response, list) and len(response) > 0:
                response = response[0].text
            else:
                response = str(response)

            response = response.strip()
            results.append({
                'existing_title': title,
                'assessment': response
            })
            print(f"{response}\n")
            print('-----------------------------------------')
            
        return results
        
        

In [3]:
# Model, API keys
## Semantic Scholar
os.environ['X_API_KEY'] = 'FcuPcoxxWC3ePxBABTLvkyWxqvt7v9h32sDBO4ug'
## Claude
model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
client = AnthropicBedrock(
        aws_access_key='AKIA2UC3AYP2UVT7W4UE',  
        aws_secret_key='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',  
        aws_region='us-west-2'
    )
## Paper
marg_title = "MARG: Multi-Agent Review Generation for Scientific Papers"
marg_abstract = "We study the ability of LLMs to generate feedback for scientific papers and develop MARG, a feedback generation approach using multiple LLM instances that engage in internal discussion. By distributing paper text across agents, MARG can consume the full text of papers beyond the input length limitations of the base LLM, and by specializing agents and incorporating sub-tasks tailored to different comment types (experiments, clarity, impact) it improves the helpfulness and specificity of feedback. In a user study, baseline methods using GPT-4 were rated as producing generic or very generic comments more than half the time, and only 1.7 comments per paper were rated as good overall in the best baseline. Our system substantially improves the ability of GPT-4 to generate specific and helpful feedback, reducing the rate of generic comments from 60% to 29% and generating 3.7 good comments per paper (a 2.2x improvement)."
    

In [4]:
novel = NoveltyAgent()

argument = {
    'title': marg_title,
    'abstract': marg_abstract
}

search_phrases = novel.generate_search(client, argument)
related_papers = novel.search_related_papers(client, argument, search_phrases)
filtered_papers = novel.filter_papers(client, argument, related_papers)
novelty_assessment = novel.assess_novelty(client, argument, filtered_papers)

GENERATING SEARCH PHRASES
First phrase: LLM-based scientific paper feedback generation techniques

Second phrase: LLM multi-agent systems for document analysis and feedback

Third phrase: Language models for academic research and peer review processes

SEARCHING FOR RELATED PAPERS
Query 1 produced 10 results
Error: 429 - {"message": "Too Many Requests. Please wait and try again or apply for a key for higher rate limits. https://www.semanticscholar.org/product/api#api-key-form", "code": "429"}
Query 3 produced 10 results
Titles of Related Papers Found:
['LLM Based Generation of Item-Description for Recommendation System', 'Citation-Enhanced Generation for LLM-based Chatbots', 'TestART: Improving LLM-based Unit Test via Co-evolution of Automated Generation and Repair Iteration', 'A LLM-Based Ranking Method for the Evaluation of Automatic Counter-Narrative Generation', 'One-step Reach: LLM-based Keyword Generation for Sponsored Search Advertising', 'Enhancing LLM-based Test Generation for