In [None]:
%pip install -q tavily-python python-dotenv ipykernel

In [10]:
import getpass
import os

if not os.environ.get("TAVILY_API_KEY"):
    os.environ["TAVILY_API_KEY"] = getpass.getpass("TAVILY_API_KEY:\n")

TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")


In [11]:
from tavily import TavilyClient

tavily_client = TavilyClient()

In [12]:
response = tavily_client.search(
    query="Who are the C-Suite employees at OpenAI?",
    search_depth="advanced",
    include_answer="advanced",
    include_raw_content=True,
    include_domains=["linkedin.com/in"],
    max_results=10,
)

In [None]:
response

In [25]:
import re

def extract_profile_data(raw_content):
    # Check if raw_content is None or empty
    if raw_content is None or raw_content.strip() == "":
        return {
            "location": None,
            "education": [],
            "skills": [],
            "work_experience": []
        }
    
    profile_data = {
        "location": None,
        "education": [],
        "skills": [],
        "work_experience": []
    }

    # Extract Location
    location_match = re.search(r"\n(.*?)\n\d+ connections", raw_content)
    if location_match:
        profile_data["location"] = location_match.group(1).strip()

    # Extract Work Experience Section
    experience_match = re.search(r'Experience:\n(.*?)\n\nEducation:', raw_content, re.DOTALL)
    if experience_match:
        experience_text = experience_match.group(1)

        # Extract Company Names and Date Ranges
        experience_entries = re.findall(r'(.+?) \(https://www\.linkedin\.com/company/.*?\)\n(.*?)\n', experience_text)

        for company, date_range in experience_entries:
            profile_data["work_experience"].append({
                "company": company.strip(),
                "date_range": date_range.strip()
            })

    # Extract Education
    education_match = re.search(r'Education:\n(.*?)\n\nSkills:', raw_content, re.DOTALL)
    if education_match:
        education_text = education_match.group(1)
        education_entries = re.findall(r'(.+?)\n(.*?)\n(\w+ \d{4} - \w+ \d{4}|N/A - Present|\w+ \d{4} - \w+ \d{4})\nGrade: (.*?)\nActivities and societies: (.*?)\n', education_text)

        for institution, program, date_range, grade, activities in education_entries:
            profile_data["education"].append({
                "institution": institution.strip(),
                "program": program.strip(),
                "date_range": date_range.strip(),
                "grade": grade.strip(),
                "activities": activities.strip()
            })

    # Extract Skills
    skills_match = re.search(r'Skills:\n(.*?)\n\n', raw_content, re.DOTALL)
    if skills_match:
        skills_text = skills_match.group(1)
        profile_data["skills"] = [skill.strip() for skill in skills_text.split('\n') if skill.strip()]

    return profile_data

In [None]:
response

In [None]:
profiles = []

for profile in response["results"]:
    profile_data = extract_profile_data(profile["raw_content"])
    profile_data["name"] = profile["title"]
    profile_data["url"] = profile["url"]
    profiles.append(profile_data)

In [None]:
print(response["answer"])

In [None]:
profiles