In [1]:
pip install linkedin-jobs-scraper

Collecting linkedin-jobs-scraper
  Downloading linkedin_jobs_scraper-2.0.6-py3-none-any.whl (34 kB)
Collecting selenium<4.0.0,>=3.141.0
  Downloading selenium-3.141.0-py2.py3-none-any.whl (904 kB)
     -------------------------------------- 904.6/904.6 kB 3.4 MB/s eta 0:00:00
Collecting websocket-client<1.0.0,>=0.58.0
  Downloading websocket_client-0.59.0-py2.py3-none-any.whl (67 kB)
     -------------------------------------- 67.2/67.2 kB 730.5 kB/s eta 0:00:00
Installing collected packages: websocket-client, selenium, linkedin-jobs-scraper
  Attempting uninstall: selenium
    Found existing installation: selenium 4.2.0
    Uninstalling selenium-4.2.0:
      Successfully uninstalled selenium-4.2.0
Successfully installed linkedin-jobs-scraper-2.0.6 selenium-3.141.0 websocket-client-0.59.0
Note: you may need to restart the kernel to use updated packages.


In [25]:
import logging
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 
warnings.filterwarnings("ignore", category=DeprecationWarning)
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, TypeFilters, ExperienceLevelFilters, OnSiteOrRemoteFilters

In [8]:
logging.basicConfig(level=logging.INFO)

In [11]:

# Fired once for each successfully processed job
software_developer = []
def on_data(data: EventData):
    software_developer.append([data.job_id, data.title, data.company, data.description, ['Software Developer']])


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=ChromeDriverManager().install(),  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=7,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=70  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='Software Developer',
        options=QueryOptions(
            apply_link=False,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page mus be navigated. Default to False.
            skip_promoted_jobs=False,  # Skip promoted jobs. Default to False.
            limit=100,
        )
    ),
]

scraper.run(queries)

INFO:WDM:Get LATEST chromedriver version for google-chrome 110.0.5481
INFO:WDM:Driver [C:\Users\pc\.wdm\drivers\chromedriver\win32\110.0.5481\chromedriver.exe] found in cache
INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=Software Developer options=QueryOptions(limit=100 locations=['Worldwide'] optimize=False apply_link=False skip_promoted_jobs=False))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:52356')
INFO:li:scraper:('[Software Developer][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?keywords=Software+Developer&location=Worldwide')
INFO:li:scraper:('[Software Developer][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[Software Developer][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[Software Developer][Worldwide]', 'OK')
INFO:li:scraper:('[Software Developer][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[Software Developer][Worldwide]', 'Found 25 jobs')

[ON_END]


In [12]:
# Fired once for each successfully processed job
business_analyst = []
def on_data(data: EventData):
    business_analyst.append([data.job_id, data.title, data.company, data.description, ['Business Analyst']])


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=ChromeDriverManager().install(),  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=7,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=70  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='Business Analyst',
        options=QueryOptions(
            apply_link=False,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page mus be navigated. Default to False.
            skip_promoted_jobs=False,  # Skip promoted jobs. Default to False.
            limit=100,
        )
    ),
]

scraper.run(queries)

INFO:WDM:Get LATEST chromedriver version for google-chrome 110.0.5481
INFO:WDM:Driver [C:\Users\pc\.wdm\drivers\chromedriver\win32\110.0.5481\chromedriver.exe] found in cache
INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=Business Analyst options=QueryOptions(limit=100 locations=['Worldwide'] optimize=False apply_link=False skip_promoted_jobs=False))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:52400')
INFO:li:scraper:('[Business Analyst][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?keywords=Business+Analyst&location=Worldwide')
INFO:li:scraper:('[Business Analyst][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[Business Analyst][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[Business Analyst][Worldwide]', 'OK')
INFO:li:scraper:('[Business Analyst][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[Business Analyst][Worldwide]', 'Found 25 jobs')
INFO:li:scraper

[ON_END]


In [16]:
# Fired once for each successfully processed job
marketing_analyst = []
def on_data(data: EventData):
    marketing_analyst.append([data.job_id, data.title, data.company, data.description, ['Marketing Analyst']])


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=ChromeDriverManager().install(),  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=7,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=70  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='Marketing Analyst',
        options=QueryOptions(
            apply_link=False,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page mus be navigated. Default to False.
            skip_promoted_jobs=False,  # Skip promoted jobs. Default to False.
            limit=150,
        )
    ),
]

scraper.run(queries)

INFO:WDM:Get LATEST chromedriver version for google-chrome 110.0.5481
INFO:WDM:Driver [C:\Users\pc\.wdm\drivers\chromedriver\win32\110.0.5481\chromedriver.exe] found in cache
INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=Marketing Analyst options=QueryOptions(limit=150 locations=['Worldwide'] optimize=False apply_link=False skip_promoted_jobs=False))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:52459')
INFO:li:scraper:('[Marketing Analyst][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?keywords=Marketing+Analyst&location=Worldwide')
INFO:li:scraper:('[Marketing Analyst][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[Marketing Analyst][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[Marketing Analyst][Worldwide]', 'OK')
INFO:li:scraper:('[Marketing Analyst][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[Marketing Analyst][Worldwide]', 'Found 25 jobs')
INFO:li

INFO:li:scraper:('[Marketing Analyst][Worldwide][103]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][104]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][105]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][106]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][107]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][108]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][109]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][110]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][111]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][112]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][113]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][114]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][115]', 'Processed')
INFO:li:scraper:('[Marketing Analyst][Worldwide][116]', 'Processed')
INFO:li:scraper:('[Marketing Analy

[ON_END]


In [17]:
# Fired once for each successfully processed job
sales_analyst = []
def on_data(data: EventData):
    sales_analyst.append([data.job_id, data.title, data.company, data.description, ['Sales Analyst']])


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=ChromeDriverManager().install(),  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=7,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=70  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='Sales Analyst',
        options=QueryOptions(
            apply_link=False,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page mus be navigated. Default to False.
            skip_promoted_jobs=False,  # Skip promoted jobs. Default to False.
            limit=110,
        )
    ),
]

scraper.run(queries)

INFO:WDM:Get LATEST chromedriver version for google-chrome 110.0.5481
INFO:WDM:Driver [C:\Users\pc\.wdm\drivers\chromedriver\win32\110.0.5481\chromedriver.exe] found in cache
INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=Sales Analyst options=QueryOptions(limit=110 locations=['Worldwide'] optimize=False apply_link=False skip_promoted_jobs=False))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:52560')
INFO:li:scraper:('[Sales Analyst][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?keywords=Sales+Analyst&location=Worldwide')
INFO:li:scraper:('[Sales Analyst][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[Sales Analyst][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[Sales Analyst][Worldwide]', 'OK')
INFO:li:scraper:('[Sales Analyst][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[Sales Analyst][Worldwide]', 'Found 25 jobs')
INFO:li:scraper:('[Sales Analyst][World

INFO:li:scraper:('[Sales Analyst][Worldwide][110]', 'Processed')


[ON_END]


In [18]:
# Fired once for each successfully processed job
ux_designer = []
def on_data(data: EventData):
    ux_designer.append([data.job_id, data.title, data.company, data.description, ['UX Designer']])


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=ChromeDriverManager().install(),  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=7,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=70  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='UX Designer',
        options=QueryOptions(
            apply_link=False,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page mus be navigated. Default to False.
            skip_promoted_jobs=False,  # Skip promoted jobs. Default to False.
            limit=120,
        )
    ),
]

scraper.run(queries)

INFO:WDM:Get LATEST chromedriver version for google-chrome 110.0.5481
INFO:WDM:Driver [C:\Users\pc\.wdm\drivers\chromedriver\win32\110.0.5481\chromedriver.exe] found in cache
INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=UX Designer options=QueryOptions(limit=120 locations=['Worldwide'] optimize=False apply_link=False skip_promoted_jobs=False))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:52603')
INFO:li:scraper:('[UX Designer][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?keywords=UX+Designer&location=Worldwide')
INFO:li:scraper:('[UX Designer][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[UX Designer][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[UX Designer][Worldwide]', 'OK')
INFO:li:scraper:('[UX Designer][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[UX Designer][Worldwide]', 'Found 25 jobs')
INFO:li:scraper:('[UX Designer][Worldwide][1]', 'Proces

INFO:li:scraper:('[UX Designer][Worldwide][114]', 'Processed')
INFO:li:scraper:('[UX Designer][Worldwide][115]', 'Processed')
INFO:li:scraper:('[UX Designer][Worldwide][116]', 'Processed')
INFO:li:scraper:('[UX Designer][Worldwide][117]', 'Processed')
INFO:li:scraper:('[UX Designer][Worldwide][118]', 'Processed')
INFO:li:scraper:('[UX Designer][Worldwide][119]', 'Processed')
INFO:li:scraper:('[UX Designer][Worldwide][120]', 'Processed')


[ON_END]


In [19]:
# Fired once for each successfully processed job
product_manager = []
def on_data(data: EventData):
    product_manager.append([data.job_id, data.title, data.company, data.description, ['Product Manager']])


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=ChromeDriverManager().install(),  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=7,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=70  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='Product Manager',
        options=QueryOptions(
            apply_link=False,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page mus be navigated. Default to False.
            skip_promoted_jobs=False,  # Skip promoted jobs. Default to False.
            limit=150,
        )
    ),
]

scraper.run(queries)

INFO:WDM:Get LATEST chromedriver version for google-chrome 110.0.5481
INFO:WDM:Driver [C:\Users\pc\.wdm\drivers\chromedriver\win32\110.0.5481\chromedriver.exe] found in cache
INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=Product Manager options=QueryOptions(limit=150 locations=['Worldwide'] optimize=False apply_link=False skip_promoted_jobs=False))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:52639')
INFO:li:scraper:('[Product Manager][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?keywords=Product+Manager&location=Worldwide')
INFO:li:scraper:('[Product Manager][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[Product Manager][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[Product Manager][Worldwide]', 'OK')
INFO:li:scraper:('[Product Manager][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[Product Manager][Worldwide]', 'Found 25 jobs')
INFO:li:scraper:('[Prod

INFO:li:scraper:('[Product Manager][Worldwide][106]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][107]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][108]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][109]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][110]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][111]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][112]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][113]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][114]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][115]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][116]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][117]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][118]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][119]', 'Processed')
INFO:li:scraper:('[Product Manager][Worldwide][120]', 'Process

[ON_END]


In [20]:
# Fired once for each successfully processed job
account_manager = []
def on_data(data: EventData):
    account_manager.append([data.job_id, data.title, data.company, data.description, ['Account Manager']])


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=ChromeDriverManager().install(),  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=7,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=70  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='Account Manager',
        options=QueryOptions(
            apply_link=False,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page mus be navigated. Default to False.
            skip_promoted_jobs=False,  # Skip promoted jobs. Default to False.
            limit=100,
        )
    ),
]

scraper.run(queries)

INFO:WDM:Get LATEST chromedriver version for google-chrome 110.0.5481
INFO:WDM:Driver [C:\Users\pc\.wdm\drivers\chromedriver\win32\110.0.5481\chromedriver.exe] found in cache
INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=Account Manager options=QueryOptions(limit=100 locations=['Worldwide'] optimize=False apply_link=False skip_promoted_jobs=False))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:52727')
INFO:li:scraper:('[Account Manager][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?keywords=Account+Manager&location=Worldwide')
INFO:li:scraper:('[Account Manager][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[Account Manager][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[Account Manager][Worldwide]', 'OK')
INFO:li:scraper:('[Account Manager][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[Account Manager][Worldwide]', 'Found 25 jobs')
INFO:li:scraper:('[Acco

[ON_END]


In [21]:
# Fired once for each successfully processed job
machine_learning = []
def on_data(data: EventData):
    machine_learning.append([data.job_id, data.title, data.company, data.description, ['Machine Learning Engineer']])


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=ChromeDriverManager().install(),  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=7,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=70  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='Machine Learning Engineer',
        options=QueryOptions(
            apply_link=False,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page mus be navigated. Default to False.
            skip_promoted_jobs=False,  # Skip promoted jobs. Default to False.
            limit=160,
        )
    ),
]

scraper.run(queries)

INFO:WDM:Get LATEST chromedriver version for google-chrome 110.0.5481
INFO:WDM:Driver [C:\Users\pc\.wdm\drivers\chromedriver\win32\110.0.5481\chromedriver.exe] found in cache
INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=Machine Learning Engineer options=QueryOptions(limit=160 locations=['Worldwide'] optimize=False apply_link=False skip_promoted_jobs=False))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:52763')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?keywords=Machine+Learning+Engineer&location=Worldwide')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide]', 'OK')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[

INFO:li:scraper:('[Machine Learning Engineer][Worldwide][92]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][93]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][94]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][95]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][96]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][97]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][98]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][99]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][100]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][101]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][102]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][103]', 'Processed')
INFO:li:scraper:('[Machine Learning Engineer][Worldwide][104]', 'Processed')
INFO:li

[ON_END]


In [23]:
job_posting_matrix = software_developer + ux_designer + business_analyst + sales_analyst + product_manager + account_manager + machine_learning + marketing_analyst
job_posting_matrix

[['3503619612',
  'Desenvolvedor de Software Jr - Low Code / No Code - Home Office',
  'Unimed Grande Florianópolis',
  'Sobre a UGF:\n\n\n\n\nSomos a maior cooperativa de saúde de Santa Catarina, há mais de 50 anos nos dedicamos a cuidar da saúde e do bem-estar das pessoas. Faça parte da equipe Unimed Grande Florianópolis e contribua para a qualidade e a eficiência de nossos serviços. Venha ser um UGFer!\n\n\n\n\nO que você fará:\n\n\n\n\nAtuar em todo o processo de desenvolvimento de produto, participando de equipes multidisciplinares. Codificar e automatizar testes, bem como contribuir para evolução de sistemas já existentes. Sugerir mudanças nos processos executados pela equipe, considerando aspectos relacionados a escalabilidade, performance, qualidade de código e arquitetura de software. Deverá manter-se atualizado quanto às tendências de mercado, em relação a novas tecnologias e metodologias utilizadas no desenvolvimento de produtos.\n\n\n\n\nPrecisamos que você tenha:\n\n\n\n\n

In [59]:
data = pd.DataFrame(data = job_posting_matrix, columns=['Job ID', 'Job Title', 'Company', 'Job Description', 'Job Profile'])

In [60]:
data['Job ID'] = data['Job ID'].astype(str)
data['Job Profile'] = data['Job Profile'].str[0]

In [61]:
data.rename(columns = {'Job Description':'Job_Description'}, inplace = True)

In [63]:
data.to_csv('Job_Posting_dataset.csv', index=False)