In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from crewai import Crew, Process, Agent, Task
from model import load_model, list_avail_models
from SemanticScholar import SemanticScholar, SS_keyword_search, SS_recommend, SS_check
from crewai_tools import FileReadTool, TXTSearchTool
import os

# Initialize tools
paper_path = "../data/The complementary contributions of academia and industry to AI research.txt"
paper_not_in_ss = "../data/paper not avail.txt"

paper_read_tool = FileReadTool(file_path=paper_not_in_ss)
# ss_tool = SemanticScholar()
ss_check, ss_recommend, ss_keyword_search = SS_check(), SS_recommend(), SS_keyword_search()


model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
llm = load_model(model_id)

# Define your agents
researcher = Agent(
  role='Retriever',
  goal='try to retrieve 10 related papers of the given paper, and assess its novelty',
  backstory='An experienced researcher with great knowledge base in research area',
  llm=llm,
  tools=[paper_read_tool, ss_check, ss_recommend, ss_keyword_search]
)

# Define your tasks
research_task = Task(
    description="""
    Task: Access the paper using 'paper_read_tool', 
    Check if the paper is in Semantic Scholar database with 'ss_check',
    If it is, then use 'ss_recommend' to get titles and abstracts of related papers,
    If not, extract keywords of the paper and use 'ss_keyword_search' to get titles and abstracts.
    After you have them, assess the novelty of input paper compared to other retrieved related papers.
    Remember if somehow ss_recommend or ss_keyword_search does not work, try it again.
    """,
    agent=researcher, 
    expected_output="""
    the tools you are using,
    the method you use to retrieve related papers,
    the keyword you used to search if you do,
    the retrieved paper titles,
    and the novelty assessment of input paper
    """
    )
# Form the crew with a sequential process
report_crew = Crew(
  agents=[researcher],
  tasks=[research_task],
  process=Process.sequential
)

# Execute the crew
result = report_crew.kickoff()
print(result)

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


ValidationError: 1 validation error for ChatBedrock
__root__
  Error raised by bedrock service: Did not find region_name, please add an environment variable `AWS_DEFAULT_REGION` which contains it, or pass `region_name` as a named parameter. (type=value_error)

In [40]:
def rec(title):
    match_url =  "https://api.semanticscholar.org/graph/v1/paper/search/match"
    recommend_url = "https://api.semanticscholar.org/recommendations/v1/papers/forpaper/"
    search_url = "https://api.semanticscholar.org/graph/v1/paper/search"
    api_key = os.getenv("X_API_KEY")
    if not api_key:
        print("API key is missing. Please set the X_API_KEY environment variable.")
    headers = {
        "x-api-key": api_key,
        "Content-Type": "application/json",
    }
    match_params = {
        "query": title,
        "fields": "title",
        "limit": 1
    }
    title_match_response = requests.get(match_url, headers=headers, params=match_params)
            
    if title_match_response.status_code == 200:
        # test
        matched_title = title_match_response.json()['data'][0]['title']
        paperId = title_match_response.json()['data'][0]['paperId']
        # print(type(paperId))
        # print(title, paperId)

        # Get the exact paper in SS
        if matched_title == title:

            url=recommend_url+paperId    
            params = {
                "fields": "abstract",
                "limit": 10,  
            }

            # Search for Recommended Papers
            response = requests.get(url, headers=headers, params=params)
            # Get recommended Papers
            if response.status_code == 200:
                recommended_papers = response.json()['recommendedPapers']

                # Recommended Papers could be None
                if recommended_papers:
                    print(f'{len(recommended_papers)} founded')
                    abstracts = [paper['abstract'] for paper in recommended_papers]
                    # print(len(abstracts))
                else: 
                    print('No related Paper!')
                    return None
            else:
                print(f"Error: {response.status_code} - {response.text}")
                return None
        
    # Not the same paper
    else:
        params = {
            "query": title,
            "fields": "abstract",
            "limit": 10  # Number of results to retrieve
        }

        response = requests.get(search_url, headers=headers, params=params)
        if response.status_code == 200:
            entries = response.json()['data']
            abstracts = [entry['abstract'] for entry in entries]
            # print(len(abstracts))
        else:
            print(f"Error: {response.status_code} - {response.text}")
            return None
    return abstracts
title = "MARG"

print(rec(title))

['In many applications, attitude estimation algorithms rely mainly on magnetic and inertial measurements from MARG sensors (consisting of a magnetometer, a gyroscope, and an accelerometer). One of the main challenges facing these algorithms is that the accelerometer measures both gravity and an unknown external acceleration, while these algorithms assume that the accelerometer measures only the gravity. In this letter, an attitude estimation algorithm on the special orthogonal group SO(3) is designed, considering the external acceleration as an unknown input with direct feedthrough to the output, with a local approximation approach. The proposed algorithm is validated through Monte Carlo simulations and real datasets, demonstrating better accuracy and enhanced performance than existing solutions.', 'In this paper, we present the FIU MARG Dataset (FIUMARGDB) of signals from the tri-axial accelerometer, gyroscope, and magnetometer contained in a low-cost miniature magnetic–angular rate–g

In [22]:
import boto3
import os
from dotenv import load_dotenv
load_dotenv()

model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
paper_path = "../data/The complementary contributions of academia and industry to AI research.txt"
with open(paper_path, 'r') as file:
    papercontent = file.read()
system_prompt =[{"text":"""
    You are an expert in academic research analysis and bibliometrics.
    I will provide you with a research paper.
    Your task is to generate 10 diverse and effective search inputs that could be used on academic search engines
    to find similar or related work. These search inputs should cover various aspects of the paper,
    including main concepts, methodologies, and unique features.
    Analyze the given title and abstract thoroughly,
    and then provide only the list of 10 search inputs in a JSON format like:
    {"keywords": ["string1", "string2", "string3", ...]}
    Provide no additional explanation or commentary, only the JSON output.
"""}]

bedrock_runtime = boto3.client("bedrock-runtime")

messages =[
    {
        "role": "user",
        "content":[{
            "text":papercontent,
        }],
    }
]


response = bedrock_runtime.converse(
    modelId=model_id,
    messages=messages,
    system=system_prompt
)

response_text = response["output"]["message"]["content"][0]["text"]

print(response_text)

Here is the JSON output containing 10 diverse search inputs based on the provided research paper:

{"keywords": [
"AI research impact academia vs industry",
"Citation disruptiveness academic industry AI papers",
"Novelty atypicality AI publications academia industry",
"Academic-industry collaboration AI research outcomes",
"State-of-the-art AI models academia vs industry",
"Mixed-effects models AI research team composition",
"AI conference publication trends academia industry",
"Research team size impact AI publications",
"Bibliometric analysis artificial intelligence research",
"AI researcher mobility academia industry impact"
]}
