In [1]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from webdriver_manager.microsoft import EdgeChromiumDriverManager

# Setup Edge options
from selenium.webdriver.edge.options import Options
options = Options()
options.use_chromium = True # This is necessary for Edge Chromium

# Setup Edge WebDriver with webdriver_manager
service = Service(executable_path=EdgeChromiumDriverManager().install())
driver = webdriver.Edge(service=service, options=options)

# Maximising the window
driver.maximize_window()

# Login

In [2]:
import getpass
import constants as c
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

def __prompt_email_password():
  u = input("Email: ")
  p = getpass.getpass(prompt="Password: ")
  return (u, p)

def page_has_loaded(driver):
    page_state = driver.execute_script('return document.readyState;')
    return page_state == 'complete'

def login(driver, email=None, password=None, cookie = None, timeout=10):
    if cookie is not None:
        return _login_with_cookie(driver, cookie)
  
    if not email or not password:
        email, password = __prompt_email_password()
  
    driver.get("https://www.linkedin.com/login")
    element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username")))
  
    email_elem = driver.find_element(By.ID,"username")
    email_elem.send_keys(email)
  
    password_elem = driver.find_element(By.ID,"password")
    password_elem.send_keys(password)
    password_elem.submit()
  
    if driver.current_url == 'https://www.linkedin.com/checkpoint/lg/login-submit':
        remember = driver.find_element(By.ID,c.REMEMBER_PROMPT)
        if remember:
            remember.submit()
  
    element = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.CLASS_NAME, c.VERIFY_LOGIN_ID)))
  
def _login_with_cookie(driver, cookie):
    driver.get("https://www.linkedin.com/login")
    driver.add_cookie({
      "name": "li_at",
      "value": cookie
    })

In [3]:
email='regenlinkedin.yfn84@aleeas.com'
password='Nishan@123'


login(driver, email, password, cookie = None, timeout=10)

# Job Search

In [30]:
import os
from typing import List
from time import sleep
import urllib.parse

from objects import Scraper
import constants as c
from jobs import Job

from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys


class JobSearch(Scraper):
    AREAS = ["recommended_jobs", None, "still_hiring", "more_jobs"]

    def __init__(self, driver, base_url="https://www.linkedin.com/jobs/", close_on_complete=False, scrape=True, scrape_recommended_jobs=True):
        # super().__init__()
        self.driver = driver
        self.base_url = base_url

        if scrape:
            self.scrape(close_on_complete, scrape_recommended_jobs)


    def scrape(self, close_on_complete=True, scrape_recommended_jobs=True):
        if self.is_signed_in():
            self.scrape_logged_in(close_on_complete=close_on_complete, scrape_recommended_jobs=scrape_recommended_jobs)
        else:
            raise NotImplemented("This part is not implemented yet")


    def scrape_job_card(self, base_element) -> Job:
        job_div = self.wait_for_element_to_load(name="job-card-list__title", base=base_element)
        job_title = job_div.text.strip()
        linkedin_url = job_div.get_attribute("href")
        company = base_element.find_element(By.CLASS_NAME, "artdeco-entity-lockup__subtitle").text
        location = base_element.find_element(By.CLASS_NAME, "job-card-container__metadata-wrapper").text
        job = Job(linkedin_url=linkedin_url, job_title=job_title, company=company, location=location, scrape=False, driver=self.driver)
        return job


    def scrape_logged_in(self, close_on_complete=True, scrape_recommended_jobs=True):
        driver = self.driver
        driver.get(self.base_url)
        print(self.base_url)
        if scrape_recommended_jobs:
            self.focus()
            sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)
            job_area = self.wait_for_element_to_load(name="scaffold-finite-scroll__content")
            areas = self.wait_for_all_elements_to_load(name="artdeco-card", base=job_area)
            for i, area in enumerate(areas):
                area_name = self.AREAS[i]
                print(area.get_attribute('class'))
                if not area_name:
                    continue
                area_results = []
                # for job_posting in area.find_element(By.CLASS_NAME, "jobs-job-board-list__item"):
                # ember-view   jobs-search-results__list-item occludable-update p0 relative scaffold-layout__list-item
              
                for job_posting in area.find_element(By.CLASS_NAME, "jobs-search-results__list-item"):                    
                    job = self.scrape_job_card(job_posting)
                    area_results.append(job)
                setattr(self, area_name, area_results)
        return


    def search(self, search_term: str) -> List[Job]:
        url = os.path.join(self.base_url, "search") + f"?keywords={urllib.parse.quote(search_term)}&refresh=true"
        self.driver.get(url)
        self.scroll_to_bottom()
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        job_listing_class_name = "jobs-search-results-list"
        job_listing_class_name = "jobs-search-results__list-item"
        
        job_listing = self.wait_for_element_to_load(name=job_listing_class_name)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.3)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.6)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 1)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        job_results = []
        for job_card in self.wait_for_all_elements_to_load(name="job-card-list", base=job_listing):
            job = self.scrape_job_card(job_card)
            job_results.append(job)
        return job_results

In [32]:
job_search = JobSearch(driver, close_on_complete=True, scrape=True, scrape_recommended_jobs=False)


https://www.linkedin.com/jobs/


In [35]:
jobs = job_search.search('Data Scientist')

In [40]:
jobs[0].scrape_logged_in()

TimeoutException: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6B832B7F2+63538]
	Microsoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6B82AF672+238306]
	(No symbol) [0x00007FF6B80E3407]
	(No symbol) [0x00007FF6B81297ED]
	(No symbol) [0x00007FF6B81298B0]
	(No symbol) [0x00007FF6B8164387]
	(No symbol) [0x00007FF6B8148E5F]
	(No symbol) [0x00007FF6B811F037]
	(No symbol) [0x00007FF6B8161F49]
	(No symbol) [0x00007FF6B8148A83]
	(No symbol) [0x00007FF6B811E55E]
	(No symbol) [0x00007FF6B811D71C]
	(No symbol) [0x00007FF6B811E111]
	Microsoft::Applications::Events::EventProperty::to_string [0x00007FF6B8501CDC+1102764]
	(No symbol) [0x00007FF6B81925F6]
	(No symbol) [0x00007FF6B8223DAC]
	(No symbol) [0x00007FF6B821BDA8]
	Microsoft::Applications::Events::EventProperty::to_string [0x00007FF6B8500C25+1098485]
	Microsoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6B82B9E31+281249]
	Microsoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6B82B45B4+258596]
	Microsoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6B82B46EF+258911]
	Microsoft::Applications::Events::ILogConfiguration::operator* [0x00007FF6B82A8EC1+211761]
	BaseThreadInitThunk [0x00007FFB9824257D+29]
	RtlUserThreadStart [0x00007FFB99A6AA58+40]
