# LinkedIn Profile Search

Search and extract professional background information from LinkedIn profiles.

**What you'll learn:**
- Domain filtering with `include_domains=["linkedin.com/in"]`
- Raw content extraction with `include_raw_content=True`
- Regex-based profile data extraction

## Setup

In [None]:
%pip install -U tavily-python --quiet

In [None]:
import os
import getpass

if not os.environ.get("TAVILY_API_KEY"):
    os.environ["TAVILY_API_KEY"] = getpass.getpass("TAVILY_API_KEY:\n")

In [None]:
from tavily import TavilyClient

client = TavilyClient()

## Search LinkedIn Profiles

Use `include_domains` to restrict results to LinkedIn profile pages only.

In [None]:
response = client.search(
    query="Who are the C-Suite employees at OpenAI?",
    search_depth="advanced",
    include_answer="advanced",
    include_raw_content=True,
    include_domains=["linkedin.com/in"],
    max_results=10,
)

print(response["answer"])

## Extract Profile Data

Parse `raw_content` to extract structured profile information.

In [None]:
import re

def extract_profile_data(raw_content):
    """Extract structured data from LinkedIn profile raw content."""
    if not raw_content or raw_content.strip() == "":
        return {"location": None, "education": [], "skills": [], "work_experience": []}
    
    profile_data = {"location": None, "education": [], "skills": [], "work_experience": []}

    # Location
    location_match = re.search(r"\n(.*?)\n\d+ connections", raw_content)
    if location_match:
        profile_data["location"] = location_match.group(1).strip()

    # Work Experience
    experience_match = re.search(r'Experience:\n(.*?)\n\nEducation:', raw_content, re.DOTALL)
    if experience_match:
        experience_text = experience_match.group(1)
        experience_entries = re.findall(r'(.+?) \(https://www\.linkedin\.com/company/.*?\)\n(.*?)\n', experience_text)
        for company, date_range in experience_entries:
            profile_data["work_experience"].append({"company": company.strip(), "date_range": date_range.strip()})

    # Education
    education_match = re.search(r'Education:\n(.*?)\n\nSkills:', raw_content, re.DOTALL)
    if education_match:
        education_text = education_match.group(1)
        education_entries = re.findall(
            r'(.+?)\n(.*?)\n(\w+ \d{4} - \w+ \d{4}|N/A - Present|\w+ \d{4} - \w+ \d{4})\nGrade: (.*?)\nActivities and societies: (.*?)\n',
            education_text
        )
        for institution, program, date_range, grade, activities in education_entries:
            profile_data["education"].append({
                "institution": institution.strip(),
                "program": program.strip(),
                "date_range": date_range.strip()
            })

    # Skills
    skills_match = re.search(r'Skills:\n(.*?)\n\n', raw_content, re.DOTALL)
    if skills_match:
        profile_data["skills"] = [s.strip() for s in skills_match.group(1).split('\n') if s.strip()]

    return profile_data

In [None]:
profiles = []

for result in response["results"]:
    profile_data = extract_profile_data(result["raw_content"])
    profile_data["name"] = result["title"]
    profile_data["url"] = result["url"]
    profiles.append(profile_data)

profiles

## Next Steps

- Add LLM verification to validate profile matches (see [Search API Skill](../../.claude/skills/tavily-api/references/search.md))
- Use score-based filtering to rank results by relevance
- Try the [Research API cookbooks](../research/) for people-based deep research