## Loading Uploading

In [24]:
from dotenv import load_dotenv

_ = load_dotenv()

In [25]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated, List
import operator
from langgraph.checkpoint.sqlite import SqliteSaver
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, AIMessage, ChatMessage

memory = SqliteSaver.from_conn_string(":memory:")

In [26]:
import os
import traceback
import json
from dotenv import load_dotenv
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from rich.text import Text
from langgraph.graph import StateGraph, END
from typing import TypedDict, List, Optional, Dict, Any
from langgraph.checkpoint.sqlite import SqliteSaver
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_openai import ChatOpenAI
from tavily import TavilyClient
from langchain_core.pydantic_v1 import BaseModel
import markdown

# Load environment variables
_ = load_dotenv()

# Initialize console and memory
console = Console()
memory = SqliteSaver.from_conn_string(":memory:")

# Initialize models and clients
model = ChatOpenAI(model="gpt-4-turbo", temperature=0.2)
tavily = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

##PromptsandStates

In [27]:
INDUSTRY_THESIS_PROMPT = """
As a venture capital research strategist, develop a comprehensive and data-driven investment thesis for the specified industry. 
Your response should include:
Comprehensive Industry Thesis Development Protocol

Objective
Develop a rigorous, globally-contextualized investment thesis that provides a nuanced and critically examined perspective on the target industry.

Detailed Research Requirements

1. Industry Definition and Contextual Analysis
- Provide a multi-dimensional definition of the industry
- Include:
  * Global perspective with regional variations
  * Socio-economic context
  * Technological ecosystem mapping
- Highlight geopolitical and regulatory landscape across different markets

2. Market Trends Analysis
- Mandate multi-source data validation
  * Minimum 3 independent research sources
  * Cross-reference global databases (World Bank, IEA, specialized industry reports)
- Quantitative trend analysis with:
  * Precise numerical data
  * Confidence intervals
  * Potential deviation scenarios
- Include:
  * Emerging market trends
  * Countertrends and potential disruption points
  * Comparative analysis across different geographical contexts

3. Technological Disruption Assessment
- Comprehensive Technology Evaluation Framework:
  * Technical feasibility assessment
  * Independent expert review
  * Potential failure mode analysis
  * Scalability simulation
  * Regulatory compliance probability
- Technology Readiness Level (TRL) mapping
- Potential integration challenges
- Comparative technological landscape

4. Emerging Business Models
- Detailed business model deconstruction
- Scalability matrix
- Financial modeling across different scenarios
- Risk-adjusted potential analysis
- Comparative global business model variations

5. Investment Criteria Development
**Qualitative Dimensions:**
- Team expertise deep-dive
- Network effect potential
- Strategic partnership ecosystem

Quantitative Metrics:
- Standardized performance indicators
- Risk-adjusted return projections
- Comparative benchmark analysis
- Funding efficiency ratio

### 6. High-Potential Sub-Sector Identification
- Multi-dimensional growth potential assessment
- Include:
  * Market size
  * Growth trajectory
  * Technological readiness
  * Regulatory environment
  * Global competitiveness index

### 7. Comprehensive Risk Analysis
- Holistic risk framework covering:
  * Technological risks
  * Market adoption risks
  * Regulatory risks
  * Geopolitical risks
- Scenario planning
- Mitigation strategy development
- Probabilistic risk assessment

## Methodological Requirements
- Mandatory use of:
  * Peer-reviewed sources
  * Government reports
  * Industry expert consultations
- Transparent methodology disclosure
- Bias acknowledgment section

## Reporting Standards
- Clear, structured presentation
- Visual data representations
- Explicit uncertainty indicators
- Comparative global context"""

In [28]:
COMPANY_RESEARCH_PROMPT = """
You are an expert deal sourcer for venture capital. 
Using the provided research context and the comprehensive industry thesis, identify and analyze potential investment targets. 
For each company, your analysis should include:
# Advanced Company Research and Evaluation Protocol

## Comprehensive Company Analysis Framework

### 1. Company Overview - Multidimensional Assessment
- Global context mapping
- Technological positioning
- Socio-economic impact potential
- Comparative ecosystem analysis

### 2. Funding and Financial Forensics
- Detailed funding archaeology
  * Investor reputation analysis
  * Capital efficiency metrics
  * Funding source diversification
- Transparent funding gap identification
- Non-dilutive funding potential assessment

### 3. Technological Innovation Validation
- Rigorous innovation assessment protocol:
  * Independent technical review
  * Comparative technological benchmark
  * Scalability simulation
  * Potential integration challenges
- Technology Readiness Level (TRL) mapping
- Potential failure mode analysis

### 4. Investment Thesis Alignment
**Quantitative Alignment Metrics:**
- Market trend correlation index
- Technological disruption potential
- Scalability probability matrix

**Qualitative Alignment Factors:**
- Strategic adaptability
- Team's innovation historical track record
- Global market responsiveness

### 5. Risk and Challenge Comprehensive Mapping
- Multidimensional risk assessment:
  * Technological risks
  * Market adoption risks
  * Regulatory landscape challenges
  * Competitive ecosystem threats
- Scenario planning
- Mitigation strategy development

### 6. Comparative Analysis Requirements
- Mandatory cross-company and cross-geographical comparisons
- Standardized evaluation metrics
- Explicit bias acknowledgment

## Reporting Methodology
- Transparent data sourcing
- Visual comparative frameworks
- Explicit uncertainty indicators
- Probabilistic potential projections

## Mandatory Documentation
- Comprehensive source references
- Methodology disclosure
- Expert consultation summaries

Industry Thesis:
{industry_thesis}
"""

In [29]:
DUE_DILIGENCE_PROMPT = """
Conduct a comprehensive due diligence analysis on the following startup. 
Your report should include detailed assessments:
# Comprehensive Startup Due Diligence Protocol

## 1. Business Model Forensic Analysis
- Value Proposition Deconstruction
  * Market need validation
  * Comparative competitive positioning
  * Revenue model resilience assessment
- Customer Acquisition Strategy Evaluation
  * Acquisition cost metrics
  * Channel effectiveness
  * Conversion probability analysis

## 2. Market Potential Comprehensive Mapping
- Total Addressable Market (TAM) Multi-Dimensional Assessment
  * Global market segmentation
  * Regional variation analysis
  * Potential market expansion scenarios
- Serviceable Available Market (SAM) Detailed Mapping
  * Competitive landscape dynamics
  * Market entry barriers
  * Technological adoption probability

## 3. Competitive Landscape Deep Dive
- Competitive Positioning Matrix
  * Technology comparative analysis
  * Market share potential
  * Competitive moat evaluation
- Advanced SWOT Analysis
  * Probabilistic scenario modeling
  * Competitive threat identification
  * Strategic adaptation potential

## 4. Team Capability Forensic Review
- Management Team Comprehensive Assessment
  * Historical performance tracking
  * Innovation capability index
  * Network effect potential
- Capability Gap Identification
  * Strategic hiring recommendations
  * Advisory board optimization

## 5. Financial Health Comprehensive Analysis
- Funding Ecosystem Mapping
  * Investor reputation analysis
  * Funding source diversification
  * Capital efficiency metrics
- Financial Stability Projection
  * Burn rate analysis
  * Runway scenario modeling
  * Non-dilutive funding potential

## 6. Technical and Operational Risk Mapping
- Technology Risk Assessment
  * Failure mode analysis
  * Integration challenge evaluation
  * Scalability simulation
- Operational Risk Comprehensive Review
  * Regulatory compliance probability
  * Supply chain vulnerability analysis
  * Technological adaptability potential

## 7. Strategic Exit Pathway Analysis
- Potential Exit Scenario Modeling
  * Acquisition probability matrix
  * IPO readiness assessment
  * Comparative market liquidity analysis
- Market Trend Contextual Evaluation

## 8. Holistic Risk Assessment Framework
- Integrated Risk Scoring
  * Quantitative risk metrics
  * Qualitative risk factors
  * Probabilistic potential projection
- Mitigation Strategy Development
  * Risk prioritization
  * Adaptive strategy recommendations

## Reporting Methodology
- Transparent data sourcing
- Explicit uncertainty indicators
- Comprehensive source documentation
- Bias acknowledgment

Company Details:
{company_details}
"""

In [30]:
class DealSourcingState(TypedDict):
    industry: str
    investment_thesis: str
    initial_companies: List[dict]
    researched_companies: List[dict]
    due_diligence_notes: List[dict]
    top_prospects: List[dict]
    max_iterations: int
    current_iteration: int
    report_path: Optional[str]

class CompanyInfo(BaseModel):
    name: str
    description: str
    funding_stage: str
    total_funding: str
    key_founders: List[str]
    competitive_advantage: str

##Functions

In [31]:
def generate_industry_thesis(state: DealSourcingState):
    """Generate investment thesis for the specified industry"""
    console.print(f"[bold blue]Generating Thesis for {state['industry']} Industry[/bold blue]")
    try:
        messages = [
            SystemMessage(content=INDUSTRY_THESIS_PROMPT),
            HumanMessage(content=state['industry'])
        ]
        
        # Add more detailed logging
        print(f"üîç Generating thesis for: {state['industry']}")
        print(f"üìã System Prompt Length: {len(INDUSTRY_THESIS_PROMPT)} characters")
        
        response = model.invoke(messages)
        
        print(f"‚úÖ Thesis Generated: {len(response.content)} characters")
        
        console.print(Panel(
            Text(response.content, style="bold white"),
            title=f"[bold green]Investment Thesis: {state['industry']}",
            border_style="bold blue"
        ))
        
        return {"investment_thesis": response.content}
    except Exception as e:
        console.print(f"[red]Error in thesis generation: {e}[/red]")
        print(f"‚ùå Thesis Generation Failed: {e}")
        traceback.print_exc()
        return {"investment_thesis": "Failed to generate thesis"}

def research_companies(state: DealSourcingState):
    """Search and identify potential investment targets"""
    console.print("[bold blue]Researching Companies[/bold blue]")
    search_queries = [
        f"Top emerging startups in {state['industry']} with recent funding",
        f"Innovative companies disrupting {state['industry']} sector",
        f"Best funded startups in {state['industry']} this year"
    ]
    
    companies = []
    
    for query in search_queries:
        console.print(f"\n[bold blue]Searching: {query}[/bold blue]")
        
        try:
            print(f"üîé Executing Tavily search: {query}")
            search_results = tavily.search(query=query, max_results=3)
            
            print(f"üìä Search Results: {len(search_results['results'])} items")
            
            for result in search_results['results']:
                try:
                    print(f"üìù Processing result: {result['title']}")
                    company_details = model.with_structured_output(CompanyInfo).invoke([
                        SystemMessage(content="Extract structured company information from the text."),
                        HumanMessage(content=result['content'])
                    ])
                    companies.append(company_details.dict())
                    print(f"‚úÖ Company processed: {company_details.name}")
                except Exception as e:
                    print(f"‚ùå Error processing company: {e}")
                    traceback.print_exc()
        
        except Exception as e:
            console.print(f"[red]Search error: {e}[/red]")
            print(f"‚ùå Tavily Search Failed: {e}")
            traceback.print_exc()
    
    console.print(f"[green]Found {len(companies)} companies[/green]")
    print(f"üìã Companies Found: {len(companies)}")
    return {"initial_companies": companies}

def run_deal_sourcing(industry, max_iterations=2):
    """Run the entire deal sourcing workflow with comprehensive logging"""
    console.print(f"[bold green]Starting Deal Sourcing for {industry} Industry[/bold green]")
    
    thread = {"configurable": {"thread_id": "deal_sourcing"}}
    initial_state = {
        "industry": industry,
        "max_iterations": max_iterations,
        "current_iteration": 0,
        "initial_companies": [],
        "researched_companies": [],
        "due_diligence_notes": [],
        "top_prospects": [],
        "investment_thesis": ""
    }
    
    final_state = None
    try:
        print(f"üöÄ Workflow Initialization for {industry}")
        
        # Direct step-by-step execution
        print("üîç Step 1: Generate Industry Thesis")
        thesis_result = generate_industry_thesis(initial_state)
        initial_state.update(thesis_result)
        
        print("üîé Step 2: Research Companies")
        research_result = research_companies(initial_state)
        initial_state.update(research_result)
        
        print("üìä Step 3: Analyze Companies")
        analyze_result = analyze_companies(initial_state)
        initial_state.update(analyze_result)
        
        print("üî¨ Step 4: Due Diligence")
        diligence_result = due_diligence(initial_state)
        initial_state.update(diligence_result)
        
        print("üèÜ Step 5: Select Top Prospects")
        prospects_result = select_top_prospects(initial_state)
        initial_state.update(prospects_result)
        
        final_state = initial_state
        
        print("üìÑ Workflow Completed Successfully")
        
    except Exception as e:
        console.print(f"[red]Workflow Execution Error: {e}[/red]")
        print(f"‚ùå Workflow Failed: {e}")
        traceback.print_exc()
        return None
    
    if final_state:
        print("üíæ Saving Results to Markdown")
        return save_results_to_markdown(final_state)
    else:
        console.print("[red]No final state generated[/red]")
        return None

#######################
def analyze_companies(state: DealSourcingState):
    """Conduct deeper research on initial companies"""
    console.print("[bold blue]Analyzing Companies[/bold blue]")
    researched_companies = []
    
    for company in state['initial_companies']:
        try:
            research_context = "\n".join([
                f"Previous research on {state['industry']}",
                state['investment_thesis']
            ])
            
            messages = [
                SystemMessage(content=COMPANY_RESEARCH_PROMPT.format(
                    research_context=research_context,
                    industry_thesis=state['investment_thesis']
                )),
                HumanMessage(content=str(company))
            ]
            
            response = model.invoke(messages)
            researched_companies.append({
                "company": company,
                "detailed_research": response.content
            })
        except Exception as e:
            console.print(f"[red]Error analyzing company {company.get('name', 'Unknown')}: {e}[/red]")
    
    console.print(f"[green]Completed research for {len(researched_companies)} companies[/green]")
    return {"researched_companies": researched_companies}

def due_diligence(state: DealSourcingState):
    """Perform due diligence on most promising companies"""
    console.print("[bold blue]Conducting Due Diligence[/bold blue]")
    due_diligence_results = []
    
    for research_item in state['researched_companies'][:3]:  # Top 3 companies
        try:
            messages = [
                SystemMessage(content=DUE_DILIGENCE_PROMPT),
                HumanMessage(content=str(research_item))
            ]
            
            response = model.invoke(messages)
            due_diligence_results.append({
                "company": research_item['company'],
                "due_diligence_notes": response.content
            })
        except Exception as e:
            console.print(f"[red]Error in due diligence for {research_item['company'].get('name', 'Unknown')}: {e}[/red]")
    
    console.print(f"[green]Completed due diligence for {len(due_diligence_results)} companies[/green]")
    return {
        "due_diligence_notes": due_diligence_results,
        "current_iteration": state.get("current_iteration", 0) + 1
    }

def select_top_prospects(state: DealSourcingState):
    """Select top investment prospects"""
    console.print("[bold blue]Selecting Top Prospects[/bold blue]")
    try:
        top_prospects = sorted(
            state['due_diligence_notes'], 
            key=lambda x: len(x['due_diligence_notes']), 
            reverse=True
        )[:2]
        
        console.print(f"[green]Selected {len(top_prospects)} top prospects[/green]")
        return {"top_prospects": top_prospects}
    except Exception as e:
        console.print(f"[red]Error selecting top prospects: {e}[/red]")
        return {"top_prospects": []}

def should_continue(state):
    """Determine whether to continue iterating"""
    console.print(f"[yellow]Current Iteration: {state['current_iteration']} / {state['max_iterations']}[/yellow]")
    if state["current_iteration"] >= state["max_iterations"]:
        return END
    return "analyze"

In [32]:
def save_results_to_markdown(state: Dict[str, Any], filename: str = None):
    """Save workflow results to a comprehensive markdown document"""
    console.print("[bold blue]Saving Results to Markdown[/bold blue]")
    try:
        industry = state.get('industry', 'Unspecified')
        
        if not filename:
            filename = f"{industry.lower().replace(' ', '_')}_deal_sourcing_report.md"
        
        # Prepare markdown content with structured sections
        md_content = f"# Deal Sourcing Report: {industry} Industry\n\n"
        
        # Investment Thesis Section
        md_content += "## Investment Thesis\n\n"
        md_content += f"{state.get('investment_thesis', 'No detailed thesis generated.')}\n\n"
        
        # Initial Companies Section
        md_content += "## Initial Companies Identified\n\n"
        for company in state.get('initial_companies', []):
            md_content += f"### {company.get('name', 'Unnamed Company')}\n"
            md_content += f"- **Description**: {company.get('description', 'No description')}\n"
            md_content += f"- **Funding Stage**: {company.get('funding_stage', 'Unknown')}\n"
            md_content += f"- **Total Funding**: {company.get('total_funding', 'Not disclosed')}\n\n"
        
        # Researched Companies Section
        md_content += "## Detailed Company Research\n\n"
        for research_item in state.get('researched_companies', []):
            company = research_item.get('company', {})
            md_content += f"### {company.get('name', 'Unnamed Company')}\n"
            md_content += f"{research_item.get('detailed_research', 'No research details')}\n\n"
        
        # Due Diligence Section
        md_content += "## Due Diligence Findings\n\n"
        for diligence_item in state.get('due_diligence_notes', []):
            company = diligence_item.get('company', {})
            md_content += f"### {company.get('name', 'Unnamed Company')}\n"
            md_content += f"{diligence_item.get('due_diligence_notes', 'No due diligence details')}\n\n"
        
        # Top Prospects Section
        md_content += "## Top Investment Prospects\n\n"
        for prospect in state.get('top_prospects', []):
            company = prospect.get('company', {})
            md_content += f"### {company.get('name', 'Unnamed Company')}\n"
            md_content += f"**Potential Fit**: High\n\n"
        
        os.makedirs('reports', exist_ok=True)
        filepath = os.path.join('reports', filename)
        
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(md_content)
        
        console.print(Panel(
            Text(f"Report saved to {filepath}", style="bold green"),
            title="[bold blue]Deal Sourcing Report",
            border_style="green"
        ))
        
        return {"report_path": filepath}
    except Exception as e:
        console.print(f"[red]Error saving markdown: {e}[/red]")
        console.print(traceback.format_exc())
        return {"report_path": None}

In [33]:
#buildingGraphs

In [34]:
# Build the workflow graph
builder = StateGraph(DealSourcingState)
builder.add_node("thesis", generate_industry_thesis)
builder.add_node("research", research_companies)
builder.add_node("analyze", analyze_companies)
builder.add_node("due_diligence", due_diligence)
builder.add_node("select_prospects", select_top_prospects)

builder.set_entry_point("thesis")
builder.add_edge("thesis", "research")
builder.add_edge("research", "analyze")
builder.add_edge("analyze", "due_diligence")
builder.add_edge("due_diligence", "select_prospects")

builder.add_conditional_edges(
    "select_prospects", 
    should_continue, 
    {END: END, "analyze": "analyze"}
)

graph = builder.compile()

#Main

In [35]:
if __name__ == "__main__":
    run_deal_sourcing("Start ups in Energy transmission")

üöÄ Workflow Initialization for Start ups in Energy transmission
üîç Step 1: Generate Industry Thesis


üîç Generating thesis for: Start ups in Energy transmission
üìã System Prompt Length: 2909 characters


‚ùå Thesis Generation Failed: Error code: 429 - {'error': {'message': 'exceeded quota for this month'}}
üîé Step 2: Research Companies


Traceback (most recent call last):
  File "/tmp/ipykernel_69/4232923428.py", line 14, in generate_industry_thesis
    response = model.invoke(messages)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 170, in invoke
    self.generate_prompt(
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 599, in generate_prompt
    return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 456, in generate
    raise e
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 446, in generate
    self._generate_with_cache(
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", 

üîé Executing Tavily search: Top emerging startups in Start ups in Energy transmission with recent funding
üìä Search Results: 3 items
üìù Processing result: Top 16 Energy Transfer (transmission) startups (March 2025)
‚ùå Error processing company: Error code: 429 - {'error': {'message': 'exceeded quota for this month'}}
üìù Processing result: List of Recently Funded Energy Startups for 2025


Traceback (most recent call last):
  File "/tmp/ipykernel_69/4232923428.py", line 54, in research_companies
    company_details = model.with_structured_output(CompanyInfo).invoke([
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain_core/runnables/base.py", line 2368, in invoke
    input = step.invoke(
            ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain_core/runnables/base.py", line 4396, in invoke
    return self.bound.invoke(
           ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 170, in invoke
    self.generate_prompt(
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 599, in generate_prompt
    return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

‚ùå Error processing company: Error code: 429 - {'error': {'message': 'exceeded quota for this month'}}
üìù Processing result: List of Funded Energy Startups For 2024 - Growth List


Traceback (most recent call last):
  File "/tmp/ipykernel_69/4232923428.py", line 54, in research_companies
    company_details = model.with_structured_output(CompanyInfo).invoke([
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain_core/runnables/base.py", line 2368, in invoke
    input = step.invoke(
            ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain_core/runnables/base.py", line 4396, in invoke
    return self.bound.invoke(
           ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 170, in invoke
    self.generate_prompt(
  File "/usr/local/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py", line 599, in generate_prompt
    return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

KeyboardInterrupt: 