# Scrape Linkedin Data

In [2]:
# Make sure we have installed the dependency
! pip freeze | grep linkedin

linkedin-scraper==2.11.2


In [1]:
! google-chrome-stable --version

Google Chrome 114.0.5735.90 


In [5]:
from linkedin_scraper import JobSearch, Job, actions
from typing import List
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import os
from pprint import pprint
import urllib
from time import sleep

def set_chrome_options() -> Options:
    """Sets chrome options for Selenium.
    Chrome options for headless browser is enabled.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_prefs = {}
    chrome_options.experimental_options["prefs"] = chrome_prefs
    chrome_prefs["profile.default_content_settings"] = {"images": 2}
    return chrome_options

class _JobSearch(JobSearch):
    def __init__(self, final_url=None, **kwargs):
        self.final_url = final_url
        self.current_url = None
        super().__init__(**kwargs)
    
    def search(self, search_term: str, page_n) -> List[Job]:
        if self.final_url is None:
            self.current_url = os.path.join(self.base_url, "search") + f"?keywords={urllib.parse.quote(search_term)}&refresh=true"
            self.driver.get(self.current_url)

            # Get redirection URL
            self.final_url = self.driver.current_url
        else:
            self.current_url = os.path.join(self.final_url, f"&start={25*(page_n-1)}")
            self.driver.get(self.current_url)
        
        self.scroll_to_bottom()
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        job_listing_class_name = "jobs-search-results-list"
        job_listing = self.wait_for_element_to_load(name=job_listing_class_name)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.3)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.6)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 1)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        job_results = []
        for job_card in self.wait_for_all_elements_to_load(name="job-card-list", base=job_listing):
            job = self.scrape_job_card(job_card)
            job_results.append(job)
        return job_results

def are_same(job1: Job, job2: Job):
    if job1.job_title == job2.job_title and job1.company == job2.company:
        return True
    return False

## 1. Scrape Job Search

Scrape the first 20 pages of the search result.

In [6]:
# Set up the lower-level services for scraping
driver = webdriver.Chrome(options=set_chrome_options())
actions.login(driver, os.environ["EMAIL"], os.environ["PWORD"]) # if email and password isnt given, it'll prompt in terminal
print("... Logged in.")
job_search = _JobSearch(driver=driver, close_on_complete=False, scrape=False)

... Logged in.


In [7]:
%%time
from selenium.common.exceptions import TimeoutException

N_PAGES = 100
SEARCH_KEYWORD = "data"

jobs = []
for page_n in range(1, N_PAGES+1):
    pprint(f"Searching jobs... Keyword: {SEARCH_KEYWORD}; Page {page_n}/{N_PAGES}")
    try:
        new_batch = job_search.search(SEARCH_KEYWORD, page_n)
    except TimeoutException:
        pprint(f"SKIPPED PAGE: {page_n}")
        continue

    # Check if the new batch of jobs are duplicates, 
    # which means we have gone through all the pages and should quit scraping.
    if jobs and are_same(new_batch[0], jobs[0]):
        pprint("Found duplicate results! All the pages have been scraped. Quiting...")
        break
        
    jobs.extend(new_batch)
    pprint(f"FINISHED PAGE: {page_n}")

'Searching jobs... Keyword: data; Page 1/50'
'FINISHED PAGE: 1'
'Searching jobs... Keyword: data; Page 2/50'
'FINISHED PAGE: 2'
'Searching jobs... Keyword: data; Page 3/50'
'FINISHED PAGE: 3'
'Searching jobs... Keyword: data; Page 4/50'
'FINISHED PAGE: 4'
'Searching jobs... Keyword: data; Page 5/50'
'FINISHED PAGE: 5'
'Searching jobs... Keyword: data; Page 6/50'
'FINISHED PAGE: 6'
'Searching jobs... Keyword: data; Page 7/50'
'FINISHED PAGE: 7'
'Searching jobs... Keyword: data; Page 8/50'
'FINISHED PAGE: 8'
'Searching jobs... Keyword: data; Page 9/50'
'FINISHED PAGE: 9'
'Searching jobs... Keyword: data; Page 10/50'
'FINISHED PAGE: 10'
'Searching jobs... Keyword: data; Page 11/50'
'FINISHED PAGE: 11'
'Searching jobs... Keyword: data; Page 12/50'
'FINISHED PAGE: 12'
'Searching jobs... Keyword: data; Page 13/50'
'FINISHED PAGE: 13'
'Searching jobs... Keyword: data; Page 14/50'
'FINISHED PAGE: 14'
'Searching jobs... Keyword: data; Page 15/50'
'FINISHED PAGE: 15'
'Searching jobs... Keyword: 

In [8]:
len(jobs)

384

In [9]:
# Save today's crawl temporarily
import pickle
import datetime

current_date = datetime.datetime.now().strftime("%Y-%m-%d")
fname = f"helsinki_data_jobs_{current_date}.pkl"
with open(f"../data/tmp/{fname}", "wb") as f:
    dicted_jobs = [job.to_dict() for job in jobs]
    pickle.dump(dicted_jobs,f)

## 2. Scrape job postings

In [10]:
import logging
from linkedin_scraper import Job, actions

from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class _Job(Job):
    def __init__(self, **kwargs):
       self.job_title = ""
       self.required_skills = ""
       self.job_type_1 = ""
       self.job_type_2 = ""
 
       super().__init__(**kwargs)
    
    def scrape_logged_in(self, close_on_complete=True):
        driver = self.driver
        
        driver.get(self.linkedin_url)
        self.focus()
        self.job_title = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'jobs-unified-top-card__job-title')]").text.strip()
        self.company = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//a[1]").text.strip()
        self.company_linkedin_url = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//a").get_attribute("href")
        self.location = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//*").text.strip()
        self.posted_date = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//span[3]").text.strip()
        self.job_type_1 = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'ui-label ui-label--accent-3 text-body-small')]/span").text.strip()
        self.job_description = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'jobs-description')]").text.strip()
        
        try:
            self.required_skills = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-how-you-match__skills-item')][1]//a").text.strip()
        except TimeoutException as e:
            logger.error(str(e))

        try:
            self.required_skills += self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-how-you-match__skills-item')][2]//a").text.strip()
        except TimeoutException as e:
            logger.error(str(e))

        try:
            self.job_type_2 = self.wait_for_element_to_load(by=By.XPATH, name="(//*[contains(@class, 'ui-label ui-label--accent-3 text-body-small')])[2]/span").text.strip()
        except TimeoutException:
            self.job_type_2 = ""
            
        try:
            self.applicant_count = self.wait_for_element_to_load(by=By.XPATH, name="jobs-unified-top-card__applicant-count").text.strip()
        except TimeoutException:
            self.applicant_count = 0
        
        try:
            self.benefits = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'salary-main-rail-card')]").text.strip()
        except TimeoutException:
            self.benefits = ""

        if close_on_complete:
            driver.close()

In [11]:
from typing import List
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import os
from pprint import pprint
import urllib
from time import sleep

def set_chrome_options() -> Options:
    """Sets chrome options for Selenium.
    Chrome options for headless browser is enabled.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_prefs = {}
    chrome_options.experimental_options["prefs"] = chrome_prefs
    chrome_prefs["profile.default_content_settings"] = {"images": 2}
    return chrome_options

In [28]:
# Set up low-level servies for scraping
driver = webdriver.Chrome(options=set_chrome_options())
actions.login(driver, os.environ["EMAIL"], os.environ["PWORD"]) 
print("... Logged in.")

... Logged in.


Ignore the error logs!

In [13]:
import pickle
import datetime

current_date = datetime.datetime.now().strftime("%Y-%m-%d")
fname = f"helsinki_data_jobs_{current_date}.pkl"

with open(f"../data/tmp/{fname}", "rb") as f:
    jobs = pickle.load(f)

print(len(jobs))

384


In [14]:
%%time
from selenium.common.exceptions import StaleElementReferenceException
from time import sleep

N_JOBS = len(jobs)

crawled_jobs = []
for i, job in enumerate(jobs):
    print(f"Crawling... Jobs {i+1}/{N_JOBS}")
    try:
        _crawled_job = _Job(linkedin_url=job.get("linkedin_url"), driver=driver, close_on_complete=False, scrape=True)
        crawled_jobs.append(_crawled_job)
        sleep(1)
    except StaleElementReferenceException:
        print(f"... Skipped Job {i+1}/{N_JOBS}.")
        sleep(1)
        continue

Crawling... Jobs 5/384
Crawling... Jobs 6/384
Crawling... Jobs 7/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 8/384
Crawling... Jobs 9/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 10/384
Crawling... Jobs 11/384
Crawling... Jobs 12/384
Crawling... Jobs 13/384
Crawling... Jobs 14/384
Crawling... Jobs 15/384
Crawling... Jobs 16/384
Crawling... Jobs 17/384
Crawling... Jobs 18/384
Crawling... Jobs 19/384
Crawling... Jobs 20/384
Crawling... Jobs 21/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 22/384
Crawling... Jobs 23/384
Crawling... Jobs 24/384
Crawling... Jobs 25/384
Crawling... Jobs 26/384
Crawling... Jobs 27/384
Crawling... Jobs 28/384
Crawling... Jobs 29/384
Crawling... Jobs 30/384
Crawling... Jobs 31/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 32/384
Crawling... Jobs 33/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 34/384
Crawling... Jobs 35/384
Crawling... Jobs 36/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 37/384
Crawling... Jobs 38/384
Crawling... Jobs 39/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 40/384
Crawling... Jobs 41/384
Crawling... Jobs 42/384
Crawling... Jobs 43/384
Crawling... Jobs 44/384
Crawling... Jobs 45/384
Crawling... Jobs 46/384
Crawling... Jobs 47/384
Crawling... Jobs 48/384
Crawling... Jobs 49/384
Crawling... Jobs 50/384
Crawling... Jobs 51/384
Crawling... Jobs 52/384
Crawling... Jobs 53/384
Crawling... Jobs 54/384
Crawling... Jobs 55/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 56/384
Crawling... Jobs 57/384
Crawling... Jobs 58/384
Crawling... Jobs 59/384
Crawling... Jobs 60/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 61/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 62/384
Crawling... Jobs 63/384
Crawling... Jobs 64/384
Crawling... Jobs 65/384
Crawling... Jobs 66/384
Crawling... Jobs 67/384
Crawling... Jobs 68/384
Crawling... Jobs 69/384
Crawling... Jobs 70/384
Crawling... Jobs 71/384
Crawling... Jobs 72/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 73/384
Crawling... Jobs 74/384
Crawling... Jobs 75/384
Crawling... Jobs 76/384
Crawling... Jobs 77/384
Crawling... Jobs 78/384
Crawling... Jobs 79/384
Crawling... Jobs 80/384
Crawling... Jobs 81/384
Crawling... Jobs 82/384
Crawling... Jobs 83/384
Crawling... Jobs 84/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 85/384
Crawling... Jobs 86/384
Crawling... Jobs 87/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 88/384
Crawling... Jobs 89/384
Crawling... Jobs 90/384
Crawling... Jobs 91/384
Crawling... Jobs 92/384
Crawling... Jobs 93/384
Crawling... Jobs 94/384
Crawling... Jobs 95/384
Crawling... Jobs 96/384
Crawling... Jobs 97/384
Crawling... Jobs 98/384
Crawling... Jobs 99/384
Crawling... Jobs 100/384
Crawling... Jobs 101/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 102/384
Crawling... Jobs 103/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 104/384
Crawling... Jobs 105/384
Crawling... Jobs 106/384
Crawling... Jobs 107/384
Crawling... Jobs 108/384
Crawling... Jobs 109/384
Crawling... Jobs 110/384
Crawling... Jobs 111/384
Crawling... Jobs 112/384
Crawling... Jobs 113/384
Crawling... Jobs 114/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 115/384
Crawling... Jobs 116/384
Crawling... Jobs 117/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 118/384
Crawling... Jobs 119/384
Crawling... Jobs 120/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 121/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 122/384
Crawling... Jobs 123/384
Crawling... Jobs 124/384
Crawling... Jobs 125/384
Crawling... Jobs 126/384
Crawling... Jobs 127/384
Crawling... Jobs 128/384
Crawling... Jobs 129/384
Crawling... Jobs 130/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 131/384
Crawling... Jobs 132/384
Crawling... Jobs 133/384
Crawling... Jobs 134/384
Crawling... Jobs 135/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 136/384
Crawling... Jobs 137/384
Crawling... Jobs 138/384
Crawling... Jobs 139/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 140/384
Crawling... Jobs 141/384
Crawling... Jobs 142/384
Crawling... Jobs 143/384
Crawling... Jobs 144/384
Crawling... Jobs 145/384
Crawling... Jobs 146/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x56084636dc96 <unknown>
#3 0x56084636ddc1 <unknown>
#4 0x5608463a77f4 <unknown>
#5 0x56084638d03d <unknown>
#6 0x5608463a530e <unknown>
#7 0x56084638cde3 <unknown>
#8 0x5608463622dd <unknown>
#9 0x56084636334e <unknown>
#10 0x5608465c23e4 <unknown>
#11 0x5608465c63d7 <unknown>
#12 0x5608465d0b20 <unknown>
#13 0x5608465c7023 <unknown>
#14 0x5608465951aa <unknown>
#15 0x5608465eb6b8 <unknown>
#16 0x5608465eb847 <unknown>
#17 0x5608465fb243 <unknown>
#18 0x7fd7bbc94ac3 <unknown>



Crawling... Jobs 147/384
Crawling... Jobs 148/384
Crawling... Jobs 149/384
Crawling... Jobs 150/384
Crawling... Jobs 151/384


WebDriverException: Message: unknown error: net::ERR_NAME_NOT_RESOLVED
  (Session info: headless chrome=114.0.5735.90)
Stacktrace:
#0 0x5608466024e3 <unknown>
#1 0x560846331c76 <unknown>
#2 0x560846329c7f <unknown>
#3 0x56084631bca2 <unknown>
#4 0x56084631d412 <unknown>
#5 0x56084631c0ca <unknown>
#6 0x56084631b168 <unknown>
#7 0x56084631afa0 <unknown>
#8 0x5608463199bf <unknown>
#9 0x560846319fed <unknown>
#10 0x560846333b06 <unknown>
#11 0x5608463a59e5 <unknown>
#12 0x56084638d012 <unknown>
#13 0x5608463a530e <unknown>
#14 0x56084638cde3 <unknown>
#15 0x5608463622dd <unknown>
#16 0x56084636334e <unknown>
#17 0x5608465c23e4 <unknown>
#18 0x5608465c63d7 <unknown>
#19 0x5608465d0b20 <unknown>
#20 0x5608465c7023 <unknown>
#21 0x5608465951aa <unknown>
#22 0x5608465eb6b8 <unknown>
#23 0x5608465eb847 <unknown>
#24 0x5608465fb243 <unknown>
#25 0x7fd7bbc94ac3 <unknown>


In [15]:
import pandas as pd

In [16]:
df_crawled_jobs = pd.DataFrame([vars(job) for job in crawled_jobs]
                              ).drop(columns=["driver"]
                              ).drop_duplicates("linkedin_url")

In [17]:
df_crawled_jobs

Unnamed: 0,job_title,required_skills,job_type_1,job_type_2,linkedin_url,company,company_linkedin_url,location,posted_date,applicant_count,job_description,benefits
0,Information Security Manager / Datasäkerhetschef,"English, Finnish, and SwedishBusiness Administ...",On-site,Full-time,https://www.linkedin.com/jobs/view/3746294030/...,Hanken School of Economics,https://www.linkedin.com/company/hanken-svensk...,"Hanken School of Economics · Helsinki, Uusimaa...",2 days ago,0,About the job\nIT-Services at Hanken School of...,
1,Qlik Sense Administrator,Analytical Skills and Cloud ComputingCross-fun...,Hybrid,Full-time,https://www.linkedin.com/jobs/view/3744707657/...,Sievo,https://www.linkedin.com/company/sievo-oy/life,"Sievo · Helsinki, Uusimaa, Finland 1 week ago...",1 week ago,0,About the job\nAre you our next Qlik Sense / Q...,
2,Revenue Analyst,"Campaign Analytics, Problem Solving, Revenue M...",On-site,Full-time,https://www.linkedin.com/jobs/view/3707060022/...,Warner Bros. Discovery,https://www.linkedin.com/company/warner-bros-d...,Warner Bros. Discovery · Finland Reposted 2 w...,Reposted 2 weeks ago,0,About the job\nEvery great story has a new beg...,
3,Data Scientist,"Data Analysis, Data Science, Machine Learning,...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3735986015/...,MedEngine,https://www.linkedin.com/company/medengine/life,"MedEngine · Helsinki, Uusimaa, Finland 2 week...",2 weeks ago,0,About the job\nMedEngine is a digitally minded...,
4,Data Engineer,"Data Analytics, Data Engineering, Data Science...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3750477070/...,The Hub,https://www.linkedin.com/company/thehubio/life,"The Hub · Helsinki, Uusimaa, Finland 17 hours...",17 hours ago,0,About the job\nAbout Huuva\n\nHuuva Kitchens t...,
...,...,...,...,...,...,...,...,...,...,...,...,...
145,Staff Security Engineer,"Communication, Data Privacy, Ethical Hacking, ...",Remote,Full-time,https://www.linkedin.com/jobs/view/3711096063/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland Reposted 1 ...",Reposted 1 week ago,0,About the job\nJob Description\n\nWolt is look...,
146,"Senior Backend Engineer, Shelf Product Recogni...","Back-End Web Development, Databases, and Pytho...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3615509468/...,Scandit,https://www.linkedin.com/company/scandit/life,Scandit · Finland Reposted 2 weeks ago · Ove...,Reposted 2 weeks ago,0,About the job\n**This role could also be based...,
147,(Senior) Data Architect - Tietoevry Tech Servi...,"Big Data, Data Integration, and Data Warehousi...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3737890407/...,Tietoevry,https://www.linkedin.com/company/tietoevry/life,"Tietoevry · Tampere, Pirkanmaa, Finland 2 wee...",2 weeks ago,0,About the job\nYou may apply to Tietoevry by s...,
148,(Senior) Data Architect - Tietoevry Tech Servi...,"Big Data, Data Integration, and Data Warehousi...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3737889673/...,Tietoevry,https://www.linkedin.com/company/tietoevry/life,"Tietoevry · Turku, Southwest Finland, Finland ...",2 weeks ago,0,About the job\nYou may apply to Tietoevry by s...,


In [15]:
# df_crawled_jobs.to_csv(f"../data/crawled_jobs_1-{len(crawled_jobs}_checkpoint.csv", index=False)

### 2.1 Continue from the failed point

In [29]:
# Continue

CONTINUE_FROM = 360

for i, job in enumerate(jobs):
    if i+1<CONTINUE_FROM:
        continue
        
    print(f"Crawling... Jobs {i+1}/{N_JOBS}")
    _crawled_job = _Job(linkedin_url=job.get("linkedin_url"), driver=driver, close_on_complete=False, scrape=True)
    crawled_jobs.append(_crawled_job)
    sleep(1)

Crawling... Jobs 360/384
Crawling... Jobs 361/384
Crawling... Jobs 362/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x55ae0b27f4e3 <unknown>
#1 0x55ae0afaec76 <unknown>
#2 0x55ae0afeac96 <unknown>
#3 0x55ae0afeadc1 <unknown>
#4 0x55ae0b0247f4 <unknown>
#5 0x55ae0b00a03d <unknown>
#6 0x55ae0b02230e <unknown>
#7 0x55ae0b009de3 <unknown>
#8 0x55ae0afdf2dd <unknown>
#9 0x55ae0afe034e <unknown>
#10 0x55ae0b23f3e4 <unknown>
#11 0x55ae0b2433d7 <unknown>
#12 0x55ae0b24db20 <unknown>
#13 0x55ae0b244023 <unknown>
#14 0x55ae0b2121aa <unknown>
#15 0x55ae0b2686b8 <unknown>
#16 0x55ae0b268847 <unknown>
#17 0x55ae0b278243 <unknown>
#18 0x7f22a1094ac3 <unknown>



Crawling... Jobs 363/384
Crawling... Jobs 364/384
Crawling... Jobs 365/384
Crawling... Jobs 366/384
Crawling... Jobs 367/384
Crawling... Jobs 368/384
Crawling... Jobs 369/384
Crawling... Jobs 370/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x55ae0b27f4e3 <unknown>
#1 0x55ae0afaec76 <unknown>
#2 0x55ae0afeac96 <unknown>
#3 0x55ae0afeadc1 <unknown>
#4 0x55ae0b0247f4 <unknown>
#5 0x55ae0b00a03d <unknown>
#6 0x55ae0b02230e <unknown>
#7 0x55ae0b009de3 <unknown>
#8 0x55ae0afdf2dd <unknown>
#9 0x55ae0afe034e <unknown>
#10 0x55ae0b23f3e4 <unknown>
#11 0x55ae0b2433d7 <unknown>
#12 0x55ae0b24db20 <unknown>
#13 0x55ae0b244023 <unknown>
#14 0x55ae0b2121aa <unknown>
#15 0x55ae0b2686b8 <unknown>
#16 0x55ae0b268847 <unknown>
#17 0x55ae0b278243 <unknown>
#18 0x7f22a1094ac3 <unknown>



Crawling... Jobs 371/384
Crawling... Jobs 372/384
Crawling... Jobs 373/384
Crawling... Jobs 374/384
Crawling... Jobs 375/384
Crawling... Jobs 376/384
Crawling... Jobs 377/384
Crawling... Jobs 378/384


ERROR:__main__:Message: 
Stacktrace:
#0 0x55ae0b27f4e3 <unknown>
#1 0x55ae0afaec76 <unknown>
#2 0x55ae0afeac96 <unknown>
#3 0x55ae0afeadc1 <unknown>
#4 0x55ae0b0247f4 <unknown>
#5 0x55ae0b00a03d <unknown>
#6 0x55ae0b02230e <unknown>
#7 0x55ae0b009de3 <unknown>
#8 0x55ae0afdf2dd <unknown>
#9 0x55ae0afe034e <unknown>
#10 0x55ae0b23f3e4 <unknown>
#11 0x55ae0b2433d7 <unknown>
#12 0x55ae0b24db20 <unknown>
#13 0x55ae0b244023 <unknown>
#14 0x55ae0b2121aa <unknown>
#15 0x55ae0b2686b8 <unknown>
#16 0x55ae0b268847 <unknown>
#17 0x55ae0b278243 <unknown>
#18 0x7f22a1094ac3 <unknown>



Crawling... Jobs 379/384
Crawling... Jobs 380/384
Crawling... Jobs 381/384
Crawling... Jobs 382/384
Crawling... Jobs 383/384
Crawling... Jobs 384/384


In [30]:
df_crawled_jobs = pd.DataFrame([vars(job) for job in crawled_jobs]).drop(columns=["driver"]).drop_duplicates("linkedin_url")
df_crawled_jobs

Unnamed: 0,job_title,required_skills,job_type_1,job_type_2,linkedin_url,company,company_linkedin_url,location,posted_date,applicant_count,job_description,benefits
0,Information Security Manager / Datasäkerhetschef,"English, Finnish, and SwedishBusiness Administ...",On-site,Full-time,https://www.linkedin.com/jobs/view/3746294030/...,Hanken School of Economics,https://www.linkedin.com/company/hanken-svensk...,"Hanken School of Economics · Helsinki, Uusimaa...",2 days ago,0,About the job\nIT-Services at Hanken School of...,
1,Qlik Sense Administrator,Analytical Skills and Cloud ComputingCross-fun...,Hybrid,Full-time,https://www.linkedin.com/jobs/view/3744707657/...,Sievo,https://www.linkedin.com/company/sievo-oy/life,"Sievo · Helsinki, Uusimaa, Finland 1 week ago...",1 week ago,0,About the job\nAre you our next Qlik Sense / Q...,
2,Revenue Analyst,"Campaign Analytics, Problem Solving, Revenue M...",On-site,Full-time,https://www.linkedin.com/jobs/view/3707060022/...,Warner Bros. Discovery,https://www.linkedin.com/company/warner-bros-d...,Warner Bros. Discovery · Finland Reposted 2 w...,Reposted 2 weeks ago,0,About the job\nEvery great story has a new beg...,
3,Data Scientist,"Data Analysis, Data Science, Machine Learning,...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3735986015/...,MedEngine,https://www.linkedin.com/company/medengine/life,"MedEngine · Helsinki, Uusimaa, Finland 2 week...",2 weeks ago,0,About the job\nMedEngine is a digitally minded...,
4,Data Engineer,"Data Analytics, Data Engineering, Data Science...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3750477070/...,The Hub,https://www.linkedin.com/company/thehubio/life,"The Hub · Helsinki, Uusimaa, Finland 17 hours...",17 hours ago,0,About the job\nAbout Huuva\n\nHuuva Kitchens t...,
...,...,...,...,...,...,...,...,...,...,...,...,...
378,Data Engineer,"Data Engineering, Data Warehousing, and Extrac...",Remote,Full-time,https://www.linkedin.com/jobs/view/3741252798/...,Trimble Inc.,https://www.linkedin.com/company/trimble/life,"Trimble Inc. · Espoo, Uusimaa, Finland Reposte...",Reposted 1 week ago,0,About the job\nYour Title: Data Engineer\n\nJo...,
379,Databricks data engineer,"Data Analytics, Data Engineering, Data Science...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3729429877/...,Accenture Nordics,https://www.linkedin.com/company/accenture-nor...,"Accenture Nordics · Helsinki, Uusimaa, Finland...",Reposted 2 weeks ago,0,About the job\nHaemme Databricks data engineer...,
380,Expert IT Developer to join Quality Research a...,"Databases, Programming, Python (Programming La...",On-site,Full-time,https://www.linkedin.com/jobs/view/3733082364/...,Nordea,https://www.linkedin.com/company/nordea/life,"Nordea · Helsinki, Uusimaa, Finland 2 weeks a...",2 weeks ago,0,About the job\nJob ID: 19431\n We are now look...,
381,Machine Learning Engineer - MLOps,"Artificial Intelligence (AI), Data Mining, Dat...",Remote,Full-time,https://www.linkedin.com/jobs/view/3750358338/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland 1 day ago ...",1 day ago,0,About the job\nJob Description\n\nTeam purpose...,


In [None]:
# Save today's crawl
import datetime

current_date = datetime.datetime.now().strftime("%Y-%m-%d")
fname = f"../data/crawled_jobs_{current_date}.csv"

# df_crawled_jobs.to_csv(fname, index=False)