In [18]:
%run holistic_ai_bedrock.py
%run load_the_env.py

üìÑ Loaded configuration from .env file

üîë API Key Status:
  ‚úÖ Holistic AI Bedrock credentials loaded (will use Bedrock)
  ‚úÖ Valyu API key loaded: 8kVFqWe6uf...

üìÅ Working directory: /Users/navneetmann/Documents/hack/Great_Agent_Hack_2025_EduAI

‚úÖ Holistic AI Bedrock helper function loaded

‚úÖ All imports successful!


/Users/navneetmann/Documents/hack/Great_Agent_Hack_2025_EduAI/holistic_ai_bedrock.py:17: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  class HolisticAIBedrockChat(BaseChatModel):


In [19]:
import os
from valyu import Valyu
from typing import TypedDict, Annotated, List, Dict
import operator
import yaml
from langgraph.graph import StateGraph, END
from holistic_ai_bedrock import get_chat_model

In [20]:
# Define the state that will be passed between agents
class ResearchState(TypedDict):
    """State object that flows through the agent graph"""
    keywords: str
    context: str
    papers: Annotated[List[Dict], operator.add]
    main_ideas: Annotated[List[str], operator.add]
    new_ideas: List[str]
    validated_ideas: List[Dict]
    current_step: str

### Multi-Agent Research Paper Innovation System using LangGraph

This system uses 4 agents to:
1. Scrape research papers based on keywords (using Valyu web search)
2. Summarize main ideas from papers
3. Combine ideas to generate novel concepts
4. Check if generated ideas are actually new

In [27]:
from valyu import SearchResponse

class ResearchInnovationSystem:
    """Multi-agent system for research paper analysis and idea generation"""
    

    def __init__(self, max_papers: int = 5, chat_model_name: str = "claude-3-5-sonnet",
                 yaml_file_path: str=r"prompts.yaml", RELEVANCE_SCORE_THRES: int=0.5):
        
        self.llm = get_chat_model(chat_model_name)
        self.valyu_api_key = os.getenv('VALYU_API_KEY')
        self.valyu = Valyu(self.valyu_api_key)
        self.max_papers = max_papers
        self.RELEVANCE_SCORE_THRES = RELEVANCE_SCORE_THRES

        # setup graph and load config
        self.config = self._load_prompt_config(yaml_file_path)
        self.graph = self._build_graph()


    
    def _load_prompt_config(self, yaml_file_path):
        """Loads the prompt templates from a YAML file."""

        if not os.path.exists(yaml_file_path):
            raise FileNotFoundError(f"Config file not found: {yaml_file_path}")
        
        with open(yaml_file_path, 'r') as f:
            config = yaml.safe_load(f)
            
        return config
    

    
    def _build_graph(self) -> StateGraph:
        """Build the LangGraph workflow"""

        workflow = StateGraph(ResearchState)
        
        # Add nodes for each agent
        workflow.add_node("scraper", self.scraper_agent)
        workflow.add_node("summarizer", self.summarizer_agent)
        workflow.add_node("novel_idea_generator", self.novel_idea_generator_agent)
        workflow.add_node("validator", self.validator_agent)
        
        # add a terminal node when scraper returns no valid papers
        workflow.add_node("no_results", self.no_results_agent)

        # Define the routing logic after the scraper
        def route_after_scraper(state: ResearchState) -> str:
            """Determines the next step after scraping."""
            if state.get("papers"):
                return "summarizer"
            return "no_results"

        # Define the flow
        workflow.set_entry_point("scraper")

        # Conditional edges from the scraper node
        workflow.add_conditional_edges(
            "scraper",
            route_after_scraper,
            # Maps the returned string from the function to the next node name
            {"summarizer": "summarizer", "no_results": "no_results"}
        )

        # Unconditional edges for the main path
        workflow.add_edge("summarizer", "novel_idea_generator")
        workflow.add_edge("novel_idea_generator", "validator")

        # Terminal nodes
        workflow.add_edge("validator", END)
        workflow.add_edge("no_results", END)
        
        return workflow.compile()
    


    def scraper_agent(self, state: ResearchState) -> ResearchState:
        """
        Agent 1: Scrape research papers using Valyu web search API
        """

        print(f"\nüîç SCRAPER AGENT: Searching for papers...")
        print(f"Keywords: {state['keywords']}")
        print(f"Context: {state['context']}")
        
        # Use Valyu API to search for research papers
        try:
            # https://docs.valyu.ai/api-reference/endpoint/deepsearch#body-query
            search_results: SearchResponse = self.valyu.search(
                    query=f"{state['context']} with keywords {{state['keywords']}}",
                    # category={state['keywords']}, # adding this often gives no results so have commented it out
                    relevance_threshold=self.RELEVANCE_SCORE_THRES,
                    # is_tool_call=True,
                    start_date="2024-01-01", # Recent research only
                    response_length="max",
                    max_num_results=self.max_papers,
                )
            
            # print(search_results)

            # Parse the results
            papers: List[Dict] = []
            # if you get failures, check if credits have expired!
            if search_results.success:
                for _, result in enumerate(search_results.results):
                    paper:Dict = {
                        "title": result.title,
                        "url": result.url,
                        "content": result.content,
                        "description": result.description,
                        "source": result.source,
                        "relevance_score": result.relevance_score
                    }
                    papers.append(paper)
                    print(f"‚úì Found: {paper['title'][:20]}...")


            print(f"\nüìö Scraped {len(papers)} papers")
        except Exception as e:

            print(f"‚ö†Ô∏è  Error during search: {str(e)}")            
            papers = []
        
        return {
            **state,
            "papers": papers,
            "current_step": "scraper_complete"
        }
    

    def summarizer_agent(self, state: ResearchState) -> ResearchState:
        """
        Agent 2: Summarize main interesting ideas from each paper
        """

        print(f"\nüìù SUMMARIZER AGENT: Extracting main ideas ...")
        
        main_ideas = []
        for i, paper in enumerate(state['papers'], 1):

            print(f"\n Analyzing paper {i}/{len(state['papers'])}: {paper['title'][:60]}...")
            
            template_string = self.config['summarizer_prompt']
            prompt = template_string.format(
                                paper_title=paper['title'],
                                paper_content=paper['content'],
                                paper_description=paper['description']
                            )
            response = self.llm.invoke(prompt)            
            main_ideas.append(response.content)

        print(f"\nüí° Main Ideas extracted: \n{main_ideas}")
        
        return {
            **state,
            "main_ideas": main_ideas,
            "current_step": "summarizer_complete"
        }
    

    def novel_idea_generator_agent(self, state: ResearchState) -> ResearchState:
        """
        Agent 3: Combine ideas to generate novel research directions
        """

        print(f"\nüî¨ NOVEL IDEA GENERATOR AGENT: Generating novel ideas...")
        
        # Prepare ideas for combination
        ideas_text = state['main_ideas']
        user_context = state['context']

        template_string = self.config['novel_idea_generator_prompt']
        prompt = template_string.format(
                            context=user_context,
                            ideas_text=ideas_text
                        )
        
        response = self.llm.invoke(prompt)
        new_ideas = response.content

        print(f"\nüí° Novel Ideas: \n{new_ideas}")

        return {
            **state,
            "new_ideas": new_ideas,
            "current_step": "novel_idea_generation_complete"
        }
    

    def validator_agent(self, state: ResearchState) -> ResearchState:
        """
        Agent 4: Validate if the generated ideas are actually novel
        """

        print(f"\n‚úÖ VALIDATOR AGENT: Validating novelty ...")
        
        candidate_new_ideas = state['main_ideas']

        template_string = self.config['validator_prompt']
        prompt = template_string.format(
                            new_ideas=candidate_new_ideas,
                        )
        response = self.llm.invoke(prompt)
            
        validated_ideas = response.content
        
        return {
            **state,
            "validated_ideas": validated_ideas,
            "current_step": "validator_complete"
        }
    

    def no_results_agent(self, state: ResearchState) -> ResearchState:
        """Terminal node when scraper finds no valid papers"""
        
        return {
            **state,
            "validated_ideas": "No relevant papers found for the given keywords/context. Try with broader query or keywords.",
            "current_step": "no_results"
        }
    

    def run(self, keywords: List[str], context: str = "") -> Dict:
        """
        Execute the complete research innovation pipeline
        
        Args:
            keywords: Search keywords for papers (for e.g. ['graph', 'neural networks', 'transformers'])
            context: Additional context about research interests
            
        Returns:
            Dictionary with validated novel ideas
        """
        
        print("="*80)
        print("üöÄ RESEARCH INNOVATION SYSTEM")
        print("="*80)
        
        
        initial_state = {
            "keywords": " | ".join(keywords),
            "context": context or "General research exploration",
            "papers": [],
            "main_ideas": [],
            "new_ideas": [],
            "validated_ideas": [],
            "current_step": "initialized"
        }
        
        # Run the graph
        final_state = self.graph.invoke(initial_state)


        return {
            "main_ideas": final_state['main_ideas'],
            "novel_ideas": (final_state['new_ideas']),
            "validated_ideas": final_state['validated_ideas']
        }

In [28]:
# Initialize the system
# make sure your .env has keys 
ris = ResearchInnovationSystem(
    chat_model_name="claude-3-5-sonnet",
    max_papers=5,
    yaml_file_path="prompts.yaml",
    RELEVANCE_SCORE_THRES=0.7
)


# When button clicked on UI it will call this func using the values from UI!
# At present, have hard coded the values ()
results = ris.run(
    keywords=["machine learning", "quantitative finance", "low latency"],  # list of keywords
    context="I need papers related to Algorithmic trading and portfolio optimization"  # user query 
)

# Access results
print(results['validated_ideas'])

üöÄ RESEARCH INNOVATION SYSTEM

üîç SCRAPER AGENT: Searching for papers...
Keywords: machine learning | quantitative finance | low latency
Context: I need papers related to Algorithmic trading and portfolio optimization
‚úì Found: Advancing Investment...
‚úì Found: Large Language Model...
‚úì Found: The Portfolio Tradin...
‚úì Found: Enhancing portfolio ...
‚úì Found: Enhancing literature...

üìö Scraped 5 papers

üìù SUMMARIZER AGENT: Extracting main ideas ...

 Analyzing paper 1/5: Advancing Investment Frontiers: Industry-grade Deep Reinforc...

 Analyzing paper 2/5: Large Language Models in equity markets: applications, techn...

 Analyzing paper 3/5: The Portfolio Trading Algo: Part 1 | by Daniel Aisen | Proof...

 Analyzing paper 4/5: Enhancing portfolio management using artificial intelligence...

 Analyzing paper 5/5: Enhancing literature review with LLM and NLP methods. Algori...

üí° Main Ideas extracted: 
['Here are the 3-4 most innovative and interesting ideas from this