In [None]:
from typing import List, Optional
from pydantic import BaseModel, Field


# ---------------- INPUT ---------------- #

class SearchFilters(BaseModel):
    job_title: str
    location: str
    industry: Optional[str] = None
    keywords: List[str] = []


class SearchRequest(BaseModel):
    filters: SearchFilters
    limit: int = Field(gt=0, le=50)


# ---------------- RAW LINKEDIN DATA ---------------- #

class RawLinkedInProfile(BaseModel):
    name: str
    title: str
    company: str
    headline: str
    summary: str
    skills: List[str]
    location: str
    linkedin_url: str
    last_active_days: int
    mutual_connections: int
    industry: str
    company_size: str


# ---------------- FINAL LEADS ---------------- #

class ScoredLead(BaseModel):
    name: str
    role: str
    company: str
    location: str
    connection: str
    linkedin_url: str
    score: int = Field(ge=0, le=100)
    reason: str


In [None]:
import asyncio
from typing import List
from playwright.async_api import async_playwright

from agents import function_tool
from schemas import SearchRequest, RawLinkedInProfile


@function_tool
async def linkedin_search_tool(request: SearchRequest) -> List[RawLinkedInProfile]:
    """
    Scrapes LinkedIn public search results using Playwright.
    STRICTLY returns raw profile data.
    NO scoring. NO reasoning.
    """

    filters = request.filters
    limit = request.limit

    profiles: List[RawLinkedInProfile] = []

    search_query = f"{filters.job_title} {filters.location} {' '.join(filters.keywords)}"
    search_url = (
        "https://www.linkedin.com/search/results/people/"
        f"?keywords={search_query.replace(' ', '%20')}"
    )

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context()
        page = await context.new_page()

        await page.goto(search_url, timeout=60000)
        await page.wait_for_timeout(3000)

        while len(profiles) < limit:
            await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
            await page.wait_for_timeout(2500)

            results = await page.query_selector_all(
                "div.reusable-search__result-container"
            )

            for item in results:
                if len(profiles) >= limit:
                    break

                try:
                    name_el = await item.query_selector("span.entity-result__title-text span[aria-hidden]")
                    headline_el = await item.query_selector("div.entity-result__primary-subtitle")
                    location_el = await item.query_selector("div.entity-result__secondary-subtitle")
                    link_el = await item.query_selector("a.app-aware-link")

                    if not (name_el and headline_el and link_el):
                        continue

                    name = (await name_el.inner_text()).strip()
                    headline = (await headline_el.inner_text()).strip()
                    location = (await location_el.inner_text()).strip() if location_el else ""

                    linkedin_url = await link_el.get_attribute("href")

                    profiles.append(
                        RawLinkedInProfile(
                            name=name,
                            title=filters.job_title,
                            company="",                     # Not reliably public
                            headline=headline,
                            summary="",                     # Not available in search
                            skills=[],                      # Not available in search
                            location=location,
                            linkedin_url=linkedin_url,
                            last_active_days=999,           # Unknown → neutral
                            mutual_connections=0,           # Unknown → neutral
                            industry=filters.industry or "",
                            company_size=""
                        )
                    )

                except Exception:
                    continue

            next_btn = await page.query_selector("button[aria-label='Next']")
            if next_btn:
                await next_btn.click()
                await page.wait_for_timeout(3000)
            else:
                break

        await browser.close()

    return profiles


In [None]:
from agents import Agent
from schemas import ScoredLead

REASONING_SYSTEM_PROMPT = """
You are a Lead Scoring and Qualification Engine.

INPUT:
- Raw LinkedIn profiles
- User filters
- Result limit

TASK:
1. Evaluate each profile for relevance:
   - Role & title match
   - Keyword relevance
   - Activity recency
   - Industry & company fit
   - Mutual connections

2. Assign a relevance score from 0–100.
3. Discard low-quality leads.
4. Rank leads by score (descending).
5. Generate a concise explanation per lead.

RULES:
- NO hallucination
- NO assumptions beyond provided data
- NO text outside JSON
- Enforce limit strictly
"""

reasoning_agent = Agent(
    name="ReasoningAgent",
    instructions=REASONING_SYSTEM_PROMPT,
    model="gpt-4o-mini",
    output_type=list[ScoredLead],
)


In [None]:
from agents import Agent, Runner
from schemas import SearchRequest
from tools.linkedin_search import linkedin_search_tool
from agents.reasoning_agent import reasoning_agent

MAIN_AGENT_SYSTEM_PROMPT = """
You are the Main Lead Discovery Orchestrator.

WORKFLOW:
1. Accept structured lead filters from the user.
2. Call the LinkedIn Search Tool to retrieve raw profiles.
3. Pass results to the Reasoning Agent for scoring and ranking.
4. Enforce result limits.
5. Return ONLY the final JSON array of leads.

CONSTRAINTS:
- DO NOT score leads yourself
- DO NOT modify profile data
- DO NOT hallucinate
- ALWAYS delegate reasoning to the Reasoning Agent
"""

main_leads_agent = Agent(
    name="MainLeadsAgent",
    instructions=MAIN_AGENT_SYSTEM_PROMPT,
    model="gpt-4o-mini",
    tools=[linkedin_search_tool, reasoning_agent],
)


In [None]:
import asyncio
from agents import Runner
from schemas import SearchRequest, SearchFilters
from agents.main_agent import main_leads_agent


async def run():
    request = SearchRequest(
        filters=SearchFilters(
            job_title="Software Engineer",
            location="Berlin",
            industry="Technology",
            keywords=["Python", "AWS", "AI"],
        ),
        limit=5,
    )

    result = await Runner.run(
        main_leads_agent,
        input=request,
    )

    # FINAL OUTPUT: JSON array only
    print(result.final_output)


if __name__ == "__main__":
    asyncio.run(run())
