In [9]:
# import web driver
from selenium import webdriver

# create ChromeOptions object
chrome_options = webdriver.ChromeOptions()

# specifies the path to the chromedriver.exe
driver = webdriver.Chrome(options=chrome_options)

# driver.get method() will navigate to a page given by the URL address
driver.get('http://www.linkedin.com')


In [10]:
from selenium.webdriver.common.by import By
from dotenv import load_dotenv
import os

load_dotenv()

linkedin_password = os.getenv('linkedin_pw')
linkedin_username = os.getenv('linkedin_uid')

# locate email input field by id
email_input = driver.find_element(By.ID, 'session_key')

# send_keys() to simulate key strokes
email_input.send_keys(linkedin_username)

# locate password input field by id
password_input = driver.find_element(By.ID, 'session_password')

# send_keys() to simulate key strokes
password_input.send_keys(linkedin_password)

In [11]:
from selenium.webdriver.common.by import By

# Locate submit button by class name
log_in_button = driver.find_element(By.CLASS_NAME, 'sign-in-form__submit-btn--full-width')

# Click the button
log_in_button.click()


In [12]:
from selenium.webdriver.common.by import By

# Locate the search button by class name
search_button = driver.find_element(By.CLASS_NAME, 'search-global-typeahead__collapsed-search-button')

# Click the button
search_button.click()


In [13]:
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# Locate the search input field by class name
search_input = driver.find_element(By.CLASS_NAME, 'search-global-typeahead__input')

# Clear any existing text in the search input
search_input.clear()

# Send the search query "Software Developer"
search_input.send_keys('Software Developer')

# Submit the search form by pressing Enter
search_input.send_keys(Keys.RETURN)


In [14]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Wait for the button with the text "People" to be present
button = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, '//button[text()="People"]'))
)

# Click the button
button.click()


In [15]:
# Assuming you have already navigated to LinkedIn, logged in, and performed the search
import csv
from selenium.common.exceptions import StaleElementReferenceException
from bs4 import BeautifulSoup
# Wait for the search results to be present
search_results = WebDriverWait(driver, 10).until(
    EC.presence_of_all_elements_located((By.XPATH, '//li[@class="reusable-search__result-container"]'))
)

profile_links = []

html_page = driver.page_source

soup = BeautifulSoup(html_page, 'html.parser')

# Find all <a> tags with class 'app-aware-link'
span_tags = soup.find_all('span', class_='entity-result__title-text t-16')

# Extract href links from <a> tags within <span> tags
for span_tag in span_tags:
    # Find <a> tags with class 'app-aware-link' within the current <span> tag
    app_aware_links = span_tag.find_all('a', class_='app-aware-link')

    # Extract href links from the found <a> tags
    href_links = [link.get('href') for link in app_aware_links]

    # Print the extracted href links
    for href_link in href_links:
        profile_links.append(href_link)

In [16]:
import pandas as pd

final_df = pd.DataFrame(columns=['Name', 'Position', 'Skills', 'Linked_URL'])

for person_link in profile_links:
    driver.get(person_link)
    
    # Wait for the presence of the header container and list elements
    wait = WebDriverWait(driver, 20)
    header_present = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'pvs-header__left-container--stack')))
    list_present = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'pvs-list')))
    
    # Get the page source after waiting for elements
    html_page = driver.page_source
    
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(html_page, 'html.parser')
    
    # Extract data from the parsed HTML
    name = soup.find('h1', class_='text-heading-xlarge inline t-24 v-align-middle break-words').text
    position = soup.find('ul', class_='pvs-list').find('li').find('span').text
    skill_link = soup.find('a', class_="optional-action-target-wrapper artdeco-button artdeco-button--tertiary artdeco-button--standard artdeco-button--2 artdeco-button--muted inline-flex justify-center full-width align-items-center artdeco-button--fluid").get('href')
    
    print("Name:", name, "Position:", position, "Link:", skill_link)

    driver.get(skill_link)
    wait = WebDriverWait(driver, 15)
    element_present = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'scaffold-layout__main')))
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    skills = []
    skills_list = soup.find('ul', class_='pvs-list')
    if skills_list:
        skill_items = skills_list.find_all('li', class_='pvs-list__item--one-column')
        
        for skill_item in skill_items:
            # Extract skill name
            skill_name = skill_item.find('span', class_='t-bold').get_text(strip=True)
            skills.append(skill_name)
            
            # Extract skill link
            skill_link = skill_item.find('a', class_='optional-action-target-wrapper')['href']
            
            # Print or store the extracted information
            print("Skill:", skill_name)
            print("Skill Link:", skill_link)
            print("------")

    df = pd.DataFrame({'Name': name, 'Position': position, 'Skills': [skills], 'Linked_URL' : person_link})
    final_df = pd.concat([final_df, df], ignore_index=True)

Name: Vaibhav Chavan Position: Software Developer Link: https://www.linkedin.com/in/vaibhav-chavan-16783a101/details/skills?profileUrn=urn%3Ali%3Afsd_profile%3AACoAABnuJ18BBNIr_7vrjvha2FvgjQP2C_mw_2A
Name: Rajat Majoka Position: You both studied at Indian Institute of Technology, Bombay Link: https://www.linkedin.com/in/rajat-majoka-30a63a151/details/interests?profileUrn=urn%3Ali%3Afsd_profile%3AACoAACSUUDoBM12GjPMqxUnXJEzHLoLi-ra9IhQ&tabIndex=0&detailScreenTabIndex=0
Name: Deepak Verma Position: Software Developer Link: https://www.linkedin.com/in/deepak-verma-b54755147/details/skills?profileUrn=urn%3Ali%3Afsd_profile%3AACoAACOHQTAB9oUGvF-XuGG6ABKDl2cCHi2OAWY
Name: Mahesh Kumar Saini Position: Kadam Technologies Pvt. Ltd. Link: https://www.linkedin.com/in/mahesh-kumar-saini-281418192/details/skills?profileUrn=urn%3Ali%3Afsd_profile%3AACoAAC1EmIEBDe0TbYKEvlFDrZCGG7xeA8hrle0
Name: Muskaan Hooda Position: Software Developer Link: https://www.linkedin.com/in/muskaan-hooda-760759195/detail

In [17]:
import psycopg2
import os
from dotenv import load_dotenv

load_dotenv()

conn = psycopg2.connect(
    dbname=os.getenv('db_name'),
    user=os.getenv('db_user'),
    password=os.getenv('db_password'),
    host=os.getenv('db_host'),
    port=os.getenv('db_port')
)

cursor = conn.cursor()

cursor.execute("""
            INSERT INTO your_table_name (name, current_position, skills, linkedin_url)
            VALUES (%s, %s, %s, %s)
        """, (name, position, skills, link))

conn.commit()
cursor.close()
conn.close()

NameError: name 'link' is not defined