# Scrape Linkedin Data

In [1]:
# Make sure we have installed the dependency
! pip freeze | grep linkedin

linkedin-scraper==2.11.2


In [2]:
! google-chrome-stable --version

Google Chrome 114.0.5735.90 


In [3]:
from linkedin_scraper import JobSearch, Job, actions
from typing import List
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import os
from pprint import pprint
import urllib
from time import sleep

def set_chrome_options() -> Options:
    """Sets chrome options for Selenium.
    Chrome options for headless browser is enabled.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_prefs = {}
    chrome_options.experimental_options["prefs"] = chrome_prefs
    chrome_prefs["profile.default_content_settings"] = {"images": 2}
    return chrome_options

class _JobSearch(JobSearch):
    def __init__(self, final_url=None, **kwargs):
        self.final_url = final_url
        self.current_url = None
        super().__init__(**kwargs)
    
    def search(self, search_term: str, page_n) -> List[Job]:
        if self.final_url is None:
            self.current_url = os.path.join(self.base_url, "search") + f"?keywords={urllib.parse.quote(search_term)}&refresh=true"
            self.driver.get(self.current_url)

            # Get redirection URL
            self.final_url = self.driver.current_url
        else:
            self.current_url = os.path.join(self.final_url, f"&start={25*(page_n-1)}")
            self.driver.get(self.current_url)
        
        self.scroll_to_bottom()
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        job_listing_class_name = "jobs-search-results-list"
        job_listing = self.wait_for_element_to_load(name=job_listing_class_name)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.3)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.6)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 1)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        job_results = []
        for job_card in self.wait_for_all_elements_to_load(name="job-card-list", base=job_listing):
            job = self.scrape_job_card(job_card)
            job_results.append(job)
        return job_results

def are_same(job1: Job, job2: Job):
    if job1.job_title == job2.job_title and job1.company == job2.company:
        return True
    return False

## 1. Scrape Job Search

Scrape the first 20 pages of the search result.

In [4]:
# Set up the lower-level services for scraping
driver = webdriver.Chrome(options=set_chrome_options())
actions.login(driver, os.environ["EMAIL"], os.environ["PWORD"]) # if email and password isnt given, it'll prompt in terminal
print("... Logged in.")
job_search = _JobSearch(driver=driver, close_on_complete=False, scrape=False)

... Logged in.


In [5]:
%%time
from selenium.common.exceptions import TimeoutException

N_PAGES = 50
SEARCH_KEYWORD = "data"

jobs = []
for page_n in range(1, N_PAGES+1):
    pprint(f"Searching jobs... Keyword: {SEARCH_KEYWORD}; Page {page_n}/{N_PAGES}")
    try:
        new_batch = job_search.search(SEARCH_KEYWORD, page_n)
    except TimeoutException:
        pprint(f"SKIPPED PAGE: {page_n}")
        continue

    # Check if the new batch of jobs are duplicates, 
    # which means we have gone through all the pages and should quit scraping.
    if jobs and are_same(new_batch[0], jobs[0]):
        pprint("Found duplicate results! All the pages have been scraped. Quiting...")
        break
        
    jobs.extend(new_batch)
    pprint(f"FINISHED PAGE: {page_n}")

'Searching jobs... Keyword: data; Page 1/50'
'FINISHED PAGE: 1'
'Searching jobs... Keyword: data; Page 2/50'
'FINISHED PAGE: 2'
'Searching jobs... Keyword: data; Page 3/50'
'FINISHED PAGE: 3'
'Searching jobs... Keyword: data; Page 4/50'
'FINISHED PAGE: 4'
'Searching jobs... Keyword: data; Page 5/50'
'FINISHED PAGE: 5'
'Searching jobs... Keyword: data; Page 6/50'
'FINISHED PAGE: 6'
'Searching jobs... Keyword: data; Page 7/50'
'FINISHED PAGE: 7'
'Searching jobs... Keyword: data; Page 8/50'
'FINISHED PAGE: 8'
'Searching jobs... Keyword: data; Page 9/50'
'FINISHED PAGE: 9'
'Searching jobs... Keyword: data; Page 10/50'
'FINISHED PAGE: 10'
'Searching jobs... Keyword: data; Page 11/50'
'FINISHED PAGE: 11'
'Searching jobs... Keyword: data; Page 12/50'
'FINISHED PAGE: 12'
'Searching jobs... Keyword: data; Page 13/50'
'FINISHED PAGE: 13'
'Searching jobs... Keyword: data; Page 14/50'
'FINISHED PAGE: 14'
'Searching jobs... Keyword: data; Page 15/50'
'FINISHED PAGE: 15'
'Searching jobs... Keyword: 

In [6]:
len(jobs)

373

In [7]:
# Save today's crawl temporarily
import pickle
import datetime

current_date = datetime.datetime.now().strftime("%Y-%m-%d")
fname = f"helsinki_data_jobs_{current_date}.pkl"
with open(f"../data/tmp/{fname}", "wb") as f:
    dicted_jobs = [job.to_dict() for job in jobs]
    pickle.dump(dicted_jobs,f)

## 2. Scrape job postings

In [8]:
import logging
from linkedin_scraper import Job, actions

from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class _Job(Job):
    def __init__(self, **kwargs):
       self.job_title = ""
       self.required_skills = ""
       self.job_type_1 = ""
       self.job_type_2 = ""
 
       super().__init__(**kwargs)
    
    def scrape_logged_in(self, close_on_complete=True):
        driver = self.driver
        
        driver.get(self.linkedin_url)
        self.focus()
        self.job_title = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'jobs-unified-top-card__job-title')]").text.strip()
        self.company = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//a[1]").text.strip()
        self.company_linkedin_url = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//a").get_attribute("href")
        self.location = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//*").text.strip()
        self.posted_date = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//span[3]").text.strip()
        self.job_type_1 = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'ui-label ui-label--accent-3 text-body-small')]/span").text.strip()
        self.job_description = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'jobs-description')]").text.strip()
        
        try:
            self.required_skills = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-how-you-match__skills-item')][1]//a").text.strip()
        except TimeoutException as e:
            logger.error(str(e))

        try:
            self.required_skills += self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-how-you-match__skills-item')][2]//a").text.strip()
        except TimeoutException as e:
            logger.error(str(e))

        try:
            self.job_type_2 = self.wait_for_element_to_load(by=By.XPATH, name="(//*[contains(@class, 'ui-label ui-label--accent-3 text-body-small')])[2]/span").text.strip()
        except TimeoutException:
            self.job_type_2 = ""
            
        try:
            self.applicant_count = self.wait_for_element_to_load(by=By.XPATH, name="jobs-unified-top-card__applicant-count").text.strip()
        except TimeoutException:
            self.applicant_count = 0
        
        try:
            self.benefits = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'salary-main-rail-card')]").text.strip()
        except TimeoutException:
            self.benefits = ""

        if close_on_complete:
            driver.close()

In [9]:
from typing import List
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import os
from pprint import pprint
import urllib
from time import sleep

def set_chrome_options() -> Options:
    """Sets chrome options for Selenium.
    Chrome options for headless browser is enabled.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_prefs = {}
    chrome_options.experimental_options["prefs"] = chrome_prefs
    chrome_prefs["profile.default_content_settings"] = {"images": 2}
    return chrome_options

In [None]:
# Set up low-level servies for scraping
driver = webdriver.Chrome(options=set_chrome_options())
actions.login(driver, os.environ["EMAIL"], os.environ["PWORD"]) 
print("... Logged in.")

Ignore the error logs!

In [11]:
import pickle
import datetime

current_date = datetime.datetime.now().strftime("%Y-%m-%d")
fname = f"helsinki_data_jobs_{current_date}.pkl"

with open(f"../data/tmp/{fname}", "rb") as f:
    jobs = pickle.load(f)

print(len(jobs))

373


In [12]:
%%time
from selenium.common.exceptions import StaleElementReferenceException
from time import sleep

N_JOBS = len(jobs)

crawled_jobs = []
for i, job in enumerate(jobs):
    print(f"Crawling... Jobs {i+1}/{N_JOBS}")
    try:
        _crawled_job = _Job(linkedin_url=job.get("linkedin_url"), driver=driver, close_on_complete=False, scrape=True)
        crawled_jobs.append(_crawled_job)
        sleep(1)
    except StaleElementReferenceException:
        print(f"... Skipped Job {i+1}/{N_JOBS}.")
        sleep(1)
        continue

Crawling... Jobs 188/373


ERROR:__main__:Message: 
Stacktrace:
#0 0x55ee883fe4e3 <unknown>
#1 0x55ee8812dc76 <unknown>
#2 0x55ee88169c96 <unknown>
#3 0x55ee88169dc1 <unknown>
#4 0x55ee881a37f4 <unknown>
#5 0x55ee8818903d <unknown>
#6 0x55ee881a130e <unknown>
#7 0x55ee88188de3 <unknown>
#8 0x55ee8815e2dd <unknown>
#9 0x55ee8815f34e <unknown>
#10 0x55ee883be3e4 <unknown>
#11 0x55ee883c23d7 <unknown>
#12 0x55ee883ccb20 <unknown>
#13 0x55ee883c3023 <unknown>
#14 0x55ee883911aa <unknown>
#15 0x55ee883e76b8 <unknown>
#16 0x55ee883e7847 <unknown>
#17 0x55ee883f7243 <unknown>
#18 0x7f55f0094ac3 <unknown>



Crawling... Jobs 189/373
Crawling... Jobs 190/373
Crawling... Jobs 191/373


ERROR:__main__:Message: 
Stacktrace:
#0 0x55ee883fe4e3 <unknown>
#1 0x55ee8812dc76 <unknown>
#2 0x55ee88169c96 <unknown>
#3 0x55ee88169dc1 <unknown>
#4 0x55ee881a37f4 <unknown>
#5 0x55ee8818903d <unknown>
#6 0x55ee881a130e <unknown>
#7 0x55ee88188de3 <unknown>
#8 0x55ee8815e2dd <unknown>
#9 0x55ee8815f34e <unknown>
#10 0x55ee883be3e4 <unknown>
#11 0x55ee883c23d7 <unknown>
#12 0x55ee883ccb20 <unknown>
#13 0x55ee883c3023 <unknown>
#14 0x55ee883911aa <unknown>
#15 0x55ee883e76b8 <unknown>
#16 0x55ee883e7847 <unknown>
#17 0x55ee883f7243 <unknown>
#18 0x7f55f0094ac3 <unknown>



Crawling... Jobs 192/373


WebDriverException: Message: unknown error: net::ERR_CONNECTION_REFUSED
  (Session info: headless chrome=114.0.5735.90)
Stacktrace:
#0 0x55ee883fe4e3 <unknown>
#1 0x55ee8812dc76 <unknown>
#2 0x55ee88125c7f <unknown>
#3 0x55ee88117ca2 <unknown>
#4 0x55ee88119412 <unknown>
#5 0x55ee881180ca <unknown>
#6 0x55ee88117168 <unknown>
#7 0x55ee88116fa0 <unknown>
#8 0x55ee881159bf <unknown>
#9 0x55ee88115fed <unknown>
#10 0x55ee8812fb06 <unknown>
#11 0x55ee881a19e5 <unknown>
#12 0x55ee88189012 <unknown>
#13 0x55ee881a130e <unknown>
#14 0x55ee88188de3 <unknown>
#15 0x55ee8815e2dd <unknown>
#16 0x55ee8815f34e <unknown>
#17 0x55ee883be3e4 <unknown>
#18 0x55ee883c23d7 <unknown>
#19 0x55ee883ccb20 <unknown>
#20 0x55ee883c3023 <unknown>
#21 0x55ee883911aa <unknown>
#22 0x55ee883e76b8 <unknown>
#23 0x55ee883e7847 <unknown>
#24 0x55ee883f7243 <unknown>
#25 0x7f55f0094ac3 <unknown>


In [13]:
import pandas as pd

In [14]:
df_crawled_jobs = pd.DataFrame([vars(job) for job in crawled_jobs]
                              ).drop(columns=["driver"]
                              ).drop_duplicates("linkedin_url")

In [15]:
df_crawled_jobs

Unnamed: 0,job_title,required_skills,job_type_1,job_type_2,linkedin_url,company,company_linkedin_url,location,posted_date,applicant_count,job_description,benefits
0,Data Scientist,"Data Analysis, Data Science, Machine Learning,...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3735986015/...,MedEngine,https://www.linkedin.com/company/medengine/life,"MedEngine · Helsinki, Uusimaa, Finland 2 week...",2 weeks ago,0,About the job\nMedEngine is a digitally minded...,
1,"Senior Data Analyst, Ads","Data Analysis, Python (Programming Language), ...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3702242885/...,Rovio Entertainment Corporation,https://www.linkedin.com/company/rovio/life,"Rovio Entertainment Corporation · Helsinki, Uu...",Reposted 1 week ago,0,About the job\nAt Rovio you will get to work w...,
2,JVM Performance and Tuning Engineer,"Business Logic, Garbage Collection, Honeycomb,...",Remote,Full-time,https://www.linkedin.com/jobs/view/3734708994/...,RELEX Solutions,https://www.linkedin.com/company/relexsolution...,RELEX Solutions · Finland 2 weeks ago · 10 a...,2 weeks ago,0,About the job\nRELEX Solutions create cutting-...,
3,Data Engineer (Level Up),"Data Warehousing, Finnish, and SQLData Visuali...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3744740320/...,Loihde Advance,https://www.linkedin.com/company/loihdeadvance...,"Loihde Advance · Uusimaa, Finland 1 week ago ...",1 week ago,0,About the job\nOnko sinulle jo kertynyt jo väh...,
4,Data Science - Machine Learning Engineer,"Artificial Intelligence (AI), Computer Science...",Remote,Full-time,https://www.linkedin.com/jobs/view/3629670334/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland Reposted 6 ...",Reposted 6 hours ago,0,About the job\nJob Description\n\nTeam purpose...,
...,...,...,...,...,...,...,...,...,...,...,...,...
186,Financial Crime Prevention Senior/Master Exper...,"Analytical SkillsBudgeting, Communication, Cor...",On-site,Full-time,https://www.linkedin.com/jobs/view/3720314180/...,Nordea,https://www.linkedin.com/company/nordea/life,"Nordea · Helsinki, Uusimaa, Finland Reposted ...",Reposted 1 week ago,0,About the job\nJob ID: 19410 \n As a Senior Ex...,
187,"Flutter Engineer, Merchant Group",Mobile Telephony,Remote,Full-time,https://www.linkedin.com/jobs/view/3737949780/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland 2 weeks ago...",2 weeks ago,0,About the job\nJob Description\n\nWolt is look...,
188,"Senior Financial Analyst, New Build","Analytical SkillsCapital Allocation, Finance, ...",On-site,Full-time,https://www.linkedin.com/jobs/view/3726702476/...,Royal Caribbean Group,https://www.linkedin.com/company/royal-caribbe...,"Royal Caribbean Group · Turku, Southwest Finla...",Reposted 3 days ago,0,About the job\nPosition Summary\n\nThe purpose...,
189,Lead Software Engineer (AI Product Lab),"Back-End Web Development, Data Science, Python...",Remote,Full-time,https://www.linkedin.com/jobs/view/3628841523/...,Smartly.io,https://www.linkedin.com/company/smartly-io/life,"Smartly.io · Helsinki, Uusimaa, Finland Repost...",Reposted 2 weeks ago,0,About the job\nJoin our dynamic Media Intellig...,


In [15]:
# df_crawled_jobs.to_csv(f"../data/crawled_jobs_1-{len(crawled_jobs}_checkpoint.csv", index=False)

### 2.1 Continue from the failed point

In [29]:
# In case session expiration
driver = webdriver.Chrome(options=set_chrome_options())
actions.login(driver, os.environ["EMAIL"], os.environ["PWORD"]) 
print("... Logged in.")

... Logged in.


In [30]:
# Continue

CONTINUE_FROM = 226

for i, job in enumerate(jobs):
    if i+1<CONTINUE_FROM:
        continue
        
    print(f"Crawling... Jobs {i+1}/{N_JOBS}")
    _crawled_job = _Job(linkedin_url=job.get("linkedin_url"), driver=driver, close_on_complete=False, scrape=True)
    crawled_jobs.append(_crawled_job)
    sleep(1)

Crawling... Jobs 358/373
Crawling... Jobs 359/373
Crawling... Jobs 360/373
Crawling... Jobs 361/373
Crawling... Jobs 362/373
Crawling... Jobs 363/373


ERROR:__main__:Message: 
Stacktrace:
#0 0x556d8beff4e3 <unknown>
#1 0x556d8bc2ec76 <unknown>
#2 0x556d8bc6ac96 <unknown>
#3 0x556d8bc6adc1 <unknown>
#4 0x556d8bca47f4 <unknown>
#5 0x556d8bc8a03d <unknown>
#6 0x556d8bca230e <unknown>
#7 0x556d8bc89de3 <unknown>
#8 0x556d8bc5f2dd <unknown>
#9 0x556d8bc6034e <unknown>
#10 0x556d8bebf3e4 <unknown>
#11 0x556d8bec33d7 <unknown>
#12 0x556d8becdb20 <unknown>
#13 0x556d8bec4023 <unknown>
#14 0x556d8be921aa <unknown>
#15 0x556d8bee86b8 <unknown>
#16 0x556d8bee8847 <unknown>
#17 0x556d8bef8243 <unknown>
#18 0x7fe1c6494ac3 <unknown>



Crawling... Jobs 364/373
Crawling... Jobs 365/373
Crawling... Jobs 366/373
Crawling... Jobs 367/373
Crawling... Jobs 368/373
Crawling... Jobs 369/373
Crawling... Jobs 370/373
Crawling... Jobs 371/373


ERROR:__main__:Message: 
Stacktrace:
#0 0x556d8beff4e3 <unknown>
#1 0x556d8bc2ec76 <unknown>
#2 0x556d8bc6ac96 <unknown>
#3 0x556d8bc6adc1 <unknown>
#4 0x556d8bca47f4 <unknown>
#5 0x556d8bc8a03d <unknown>
#6 0x556d8bca230e <unknown>
#7 0x556d8bc89de3 <unknown>
#8 0x556d8bc5f2dd <unknown>
#9 0x556d8bc6034e <unknown>
#10 0x556d8bebf3e4 <unknown>
#11 0x556d8bec33d7 <unknown>
#12 0x556d8becdb20 <unknown>
#13 0x556d8bec4023 <unknown>
#14 0x556d8be921aa <unknown>
#15 0x556d8bee86b8 <unknown>
#16 0x556d8bee8847 <unknown>
#17 0x556d8bef8243 <unknown>
#18 0x7fe1c6494ac3 <unknown>



Crawling... Jobs 372/373
Crawling... Jobs 373/373


In [31]:
df_crawled_jobs = pd.DataFrame([vars(job) for job in crawled_jobs]).drop(columns=["driver"]).drop_duplicates("linkedin_url")
df_crawled_jobs

Unnamed: 0,job_title,required_skills,job_type_1,job_type_2,linkedin_url,company,company_linkedin_url,location,posted_date,applicant_count,job_description,benefits
0,Data Scientist,"Data Analysis, Data Science, Machine Learning,...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3735986015/...,MedEngine,https://www.linkedin.com/company/medengine/life,"MedEngine · Helsinki, Uusimaa, Finland 2 week...",2 weeks ago,0,About the job\nMedEngine is a digitally minded...,
1,"Senior Data Analyst, Ads","Data Analysis, Python (Programming Language), ...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3702242885/...,Rovio Entertainment Corporation,https://www.linkedin.com/company/rovio/life,"Rovio Entertainment Corporation · Helsinki, Uu...",Reposted 1 week ago,0,About the job\nAt Rovio you will get to work w...,
2,JVM Performance and Tuning Engineer,"Business Logic, Garbage Collection, Honeycomb,...",Remote,Full-time,https://www.linkedin.com/jobs/view/3734708994/...,RELEX Solutions,https://www.linkedin.com/company/relexsolution...,RELEX Solutions · Finland 2 weeks ago · 10 a...,2 weeks ago,0,About the job\nRELEX Solutions create cutting-...,
3,Data Engineer (Level Up),"Data Warehousing, Finnish, and SQLData Visuali...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3744740320/...,Loihde Advance,https://www.linkedin.com/company/loihdeadvance...,"Loihde Advance · Uusimaa, Finland 1 week ago ...",1 week ago,0,About the job\nOnko sinulle jo kertynyt jo väh...,
4,Data Science - Machine Learning Engineer,"Artificial Intelligence (AI), Computer Science...",Remote,Full-time,https://www.linkedin.com/jobs/view/3629670334/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland Reposted 6 ...",Reposted 6 hours ago,0,About the job\nJob Description\n\nTeam purpose...,
...,...,...,...,...,...,...,...,...,...,...,...,...
368,Machine Learning Engineer - MLOps,"Artificial Intelligence (AI), Data Mining, Dat...",Remote,Full-time,https://www.linkedin.com/jobs/view/3750358338/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland 2 days ago ...",2 days ago,0,About the job\nJob Description\n\nTeam purpose...,
369,Databricks data engineer,"Data Analytics, Data Engineering, Data Science...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3729429877/...,Accenture Nordics,https://www.linkedin.com/company/accenture-nor...,"Accenture Nordics · Helsinki, Uusimaa, Finland...",Reposted 2 weeks ago,0,About the job\nHaemme Databricks data engineer...,
370,Lead Quantitative Risk Analyst (Data Analytics...,"Business Requirements, Data Reconciliation, Fu...",On-site,Full-time,https://www.linkedin.com/jobs/view/3733079663/...,Nordea,https://www.linkedin.com/company/nordea/life,"Nordea · Helsinki, Uusimaa, Finland 2 weeks a...",2 weeks ago,0,About the job\nJob ID: 18802 \nWe are looking ...,
371,Cloud Ops Engineer,Cloud Computing and Microsoft AzureAzure Cosmo...,On-site,Full-time,https://www.linkedin.com/jobs/view/3694191393/...,Cloud1 Oy,https://www.linkedin.com/company/cloud1-oy/life,"Cloud1 Oy · Helsinki, Uusimaa, Finland Reposte...",Reposted 3 weeks ago,0,About the job\nCloud1 hakee kokenutta teknistä...,


In [32]:
# Save today's crawl
import datetime

current_date = datetime.datetime.now().strftime("%Y-%m-%d")
fname = f"../data/crawled_jobs_{current_date}.csv"

df_crawled_jobs.to_csv(fname, index=False)