In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

def chrome(headless=False):
    opt = Options()
    if headless:
        opt.add_argument("--headless=new")
    opt.add_experimental_option('excludeSwitches', ['enable-logging'])
    opt.add_argument("--disable-popup-blocking")
    opt.set_capability("goog:loggingPrefs", {"performance": "ALL"})
    service = Service(ChromeDriverManager().install())
    browser = webdriver.Chrome(service=service, options=opt)
    browser.implicitly_wait(10)
    return browser

# Launch browser
browser = chrome(True)  # set True for headless
browser.get('https://www.linkedin.com/uas/login')
browser.implicitly_wait(3)
file = open('config.txt')
lines = file.readlines()
username = lines[0]
password = lines[1]

elementID = browser.find_element(By.ID, "username")
elementID.send_keys(username)

elementID = browser.find_element(By.ID, "password")
elementID.send_keys(password)

elementID.submit()

# ===========================
# SEARCH PEOPLE BY PAST COMPANY
# ===========================

companies = {
    "Google": "1441",   # Example: LinkedIn's internal company ID for Google
    "Microsoft": "1035"
}

profile_links = []

for company, comp_id in companies.items():
    search_url = f"https://www.linkedin.com/search/results/people/?facetPastCompany=%5B%22{comp_id}%22%5D"
    browser.get(search_url)
    time.sleep(3)

    # Scroll to load more results
    for _ in range(3):  
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    # Parse search results
    soup = BeautifulSoup(browser.page_source, "lxml")
    anchors = soup.find_all("a", {"class": "app-aware-link"}, href=True)

    for a in anchors:
        href = a["href"]
        if "/in/" in href and href not in profile_links:
            profile_links.append(href.split("?")[0])  # clean query string

print(f"Found {len(profile_links)} profiles")

# ===========================
# SCRAPE PROFILE DETAILS
# ===========================

info = []

for link in profile_links[:5]:  # limit for demo
    browser.get(link)
    time.sleep(3)
    soup = BeautifulSoup(browser.page_source, "lxml")

    try:
        name = soup.find("h1").get_text().strip()
    except:
        name = None

    try:
        title = soup.find("div", {"class": "text-body-medium"}).get_text().strip()
    except:
        title = None

    try:
        location = soup.find("span", {"class": "text-body-small"}).get_text().strip()
    except:
        location = None

    info.append([name, title, location, link])
    time.sleep(2)

# Save to CSV
df = pd.DataFrame(info, columns=["Full Name", "Title", "Location", "Profile URL"])
df.to_csv("past_company_people.csv", index=False)

print("✅ Done Scraping by Past Company!")
browser.quit()


Found 0 profiles
✅ Done Scraping by Past Company!
