# Scrape Linkedin Data

In [1]:
# Make sure we have installed the dependency
! pip freeze | grep linkedin

linkedin-scraper==2.11.2


In [3]:
from linkedin_scraper import JobSearch, Job, actions
from typing import List
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import os
from pprint import pprint
import urllib
from time import sleep

def set_chrome_options() -> Options:
    """Sets chrome options for Selenium.
    Chrome options for headless browser is enabled.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_prefs = {}
    chrome_options.experimental_options["prefs"] = chrome_prefs
    chrome_prefs["profile.default_content_settings"] = {"images": 2}
    return chrome_options

class _JobSearch(JobSearch):
    def __init__(self, final_url=None, **kwargs):
        self.final_url = final_url
        self.current_url = None
        super().__init__(**kwargs)
    
    def search(self, search_term: str, page_n) -> List[Job]:
        if self.final_url is None:
            self.current_url = os.path.join(self.base_url, "search") + f"?keywords={urllib.parse.quote(search_term)}&refresh=true"
            self.driver.get(self.current_url)

            # Get redirection URL
            self.final_url = self.driver.current_url
        else:
            self.current_url = os.path.join(self.final_url, f"&start={25*(page_n-1)}")
            self.driver.get(self.current_url)
        
        self.scroll_to_bottom()
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        job_listing_class_name = "jobs-search-results-list"
        job_listing = self.wait_for_element_to_load(name=job_listing_class_name)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.3)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.6)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        self.scroll_class_name_element_to_page_percent(job_listing_class_name, 1)
        self.focus()
        sleep(self.WAIT_FOR_ELEMENT_TIMEOUT)

        job_results = []
        for job_card in self.wait_for_all_elements_to_load(name="job-card-list", base=job_listing):
            job = self.scrape_job_card(job_card)
            job_results.append(job)
        return job_results

def are_same(job1: Job, job2: Job):
    if job1.job_title == job2.job_title and job1.company == job2.company:
        return True
    return False

## 1. Scrape Job Search

Scrape the first 20 pages of the search result.

In [4]:
# Set up the lower-level services for scraping
driver = webdriver.Chrome(options=set_chrome_options())
actions.login(driver, os.environ["EMAIL"], os.environ["PWORD"]) # if email and password isnt given, it'll prompt in terminal
print("... Logged in.")
job_search = _JobSearch(driver=driver, close_on_complete=False, scrape=False)

... Logged in.


In [5]:
%%time
N_PAGES = 22
SEARCH_KEYWORD = "data"

jobs = []
for page_n in range(1, N_PAGES+1):
    pprint(f"Searching jobs... Keyword: {SEARCH_KEYWORD}; Page {page_n}/{N_PAGES}")
    new_batch = job_search.search(SEARCH_KEYWORD, page_n)

    # Check if the new batch of jobs are duplicates, 
    # which means we have gone through all the pages and should quit scraping.
    if jobs and are_same(new_batch[0], jobs[0]):
        pprint("Found duplicate results! All the pages have been scraped. Quiting...")
        break
        
    jobs.extend(new_batch)
    pprint(f"FINISHED PAGE: {page_n}")

'Searching jobs... Keyword: data; Page 1/22'
'FINISHED PAGE: 1'
'Searching jobs... Keyword: data; Page 2/22'
'FINISHED PAGE: 2'
'Searching jobs... Keyword: data; Page 3/22'
'FINISHED PAGE: 3'
'Searching jobs... Keyword: data; Page 4/22'
'FINISHED PAGE: 4'
'Searching jobs... Keyword: data; Page 5/22'
'FINISHED PAGE: 5'
'Searching jobs... Keyword: data; Page 6/22'
'FINISHED PAGE: 6'
'Searching jobs... Keyword: data; Page 7/22'
'FINISHED PAGE: 7'
'Searching jobs... Keyword: data; Page 8/22'
'FINISHED PAGE: 8'
'Searching jobs... Keyword: data; Page 9/22'
'FINISHED PAGE: 9'
'Searching jobs... Keyword: data; Page 10/22'
'FINISHED PAGE: 10'
'Searching jobs... Keyword: data; Page 11/22'
'FINISHED PAGE: 11'
'Searching jobs... Keyword: data; Page 12/22'
'FINISHED PAGE: 12'
'Searching jobs... Keyword: data; Page 13/22'
'FINISHED PAGE: 13'
'Searching jobs... Keyword: data; Page 14/22'
'FINISHED PAGE: 14'
'Searching jobs... Keyword: data; Page 15/22'
'FINISHED PAGE: 15'
'Searching jobs... Keyword: 

TimeoutException: Message: 


In [7]:
len(jobs)

313

In [24]:
# Save today's crawl temporarily
import pickle
import datetime

current_date = datetime.datetime.now().strftime("%Y-%m-%d")
fname = f"helsinki_data_jobs_{current_date}.pkl"
with open(f"../data/{fname}", "wb") as f:
    dicted_jobs = [job.to_dict() for job in jobs]
    pickle.dump(dicted_jobs,f)

## 2. Scrape job postings

In [5]:
import logging
from linkedin_scraper import Job, actions

from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class _Job(Job):
    def __init__(self, **kwargs):
       self.job_title = ""
       self.required_skills = ""
       self.job_type_1 = ""
       self.job_type_2 = ""
 
       super().__init__(**kwargs)
    
    def scrape_logged_in(self, close_on_complete=True):
        driver = self.driver
        
        driver.get(self.linkedin_url)
        self.focus()
        self.job_title = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'jobs-unified-top-card__job-title')]").text.strip()
        self.company = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//a[1]").text.strip()
        self.company_linkedin_url = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//a").get_attribute("href")
        self.location = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//*").text.strip()
        self.posted_date = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-jobs-unified-top-card__primary-description')]//span[3]").text.strip()
        self.job_type_1 = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'ui-label ui-label--accent-3 text-body-small')]/span").text.strip()
        self.job_description = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'jobs-description')]").text.strip()
        
        try:
            self.required_skills = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-how-you-match__skills-item')][1]//a").text.strip()
        except TimeoutException as e:
            logger.error(str(e))

        try:
            self.required_skills += self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'job-details-how-you-match__skills-item')][2]//a").text.strip()
        except TimeoutException as e:
            logger.error(str(e))

        try:
            self.job_type_2 = self.wait_for_element_to_load(by=By.XPATH, name="(//*[contains(@class, 'ui-label ui-label--accent-3 text-body-small')])[2]/span").text.strip()
        except TimeoutException:
            self.job_type_2 = ""
            
        try:
            self.applicant_count = self.wait_for_element_to_load(by=By.XPATH, name="jobs-unified-top-card__applicant-count").text.strip()
        except TimeoutException:
            self.applicant_count = 0
        
        try:
            self.benefits = self.wait_for_element_to_load(by=By.XPATH, name="//*[contains(@class, 'salary-main-rail-card')]").text.strip()
        except TimeoutException:
            self.benefits = ""

        if close_on_complete:
            driver.close()

In [6]:
from typing import List
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import os
from pprint import pprint
import urllib
from time import sleep

def set_chrome_options() -> Options:
    """Sets chrome options for Selenium.
    Chrome options for headless browser is enabled.
    """
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_prefs = {}
    chrome_options.experimental_options["prefs"] = chrome_prefs
    chrome_prefs["profile.default_content_settings"] = {"images": 2}
    return chrome_options

In [7]:
# Set up low-level servies for scraping
driver = webdriver.Chrome(options=set_chrome_options())
actions.login(driver, os.environ["EMAIL"], os.environ["PWORD"]) 
print("... Logged in.")

... Logged in.


Ignore the error logs!

In [13]:
import pickle

with open("../data/helsinki_data_jobs_2023-10-22.pkl", "rb") as f:
    jobs = pickle.load(f)

In [15]:
%%time
from time import sleep

N_JOBS = len(jobs)

crawled_jobs = []
for i, job in enumerate(jobs):
    print(f"Crawling... Jobs {i+1}/{N_JOBS}")
    _crawled_job = _Job(linkedin_url=job.get("linkedin_url"), driver=driver, close_on_complete=False, scrape=True)
    crawled_jobs.append(_crawled_job)
    sleep(3)

Crawling... Jobs 160/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>

ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c02

StaleElementReferenceException: Message: stale element reference: stale element not found
  (Session info: headless chrome=114.0.5735.90); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c1fb53c <unknown>
#3 0x55fd0c1fc97e <unknown>
#4 0x55fd0c1fca4c <unknown>
#5 0x55fd0c22d922 <unknown>
#6 0x55fd0c252012 <unknown>
#7 0x55fd0c228b03 <unknown>
#8 0x55fd0c2521de <unknown>
#9 0x55fd0c26a30e <unknown>
#10 0x55fd0c251de3 <unknown>
#11 0x55fd0c2272dd <unknown>
#12 0x55fd0c22834e <unknown>
#13 0x55fd0c4873e4 <unknown>
#14 0x55fd0c48b3d7 <unknown>
#15 0x55fd0c495b20 <unknown>
#16 0x55fd0c48c023 <unknown>
#17 0x55fd0c45a1aa <unknown>
#18 0x55fd0c4b06b8 <unknown>
#19 0x55fd0c4b0847 <unknown>
#20 0x55fd0c4c0243 <unknown>
#21 0x7fd44c094ac3 <unknown>


In [18]:
import pandas as pd

In [32]:
df_crawled_jobs = pd.DataFrame([vars(job) for job in crawled_jobs]).drop(columns=["driver"])

In [33]:
df_crawled_jobs

Unnamed: 0,job_title,required_skills,job_type_1,job_type_2,linkedin_url,company,company_linkedin_url,location,posted_date,applicant_count,job_description,benefits
0,RWE Scientist / Epidemiologist,"Customer Relationship Management (CRM), Epidem...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3737909849/...,MedEngine,https://www.linkedin.com/company/medengine/life,"MedEngine · Helsinki, Uusimaa, Finland 1 week...",1 week ago,0,About the job\nMedEngine is a digitally minded...,
1,Data Engineer,"Data Engineering, Git, Python (Programming Lan...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3736532279/...,Suomen Palloliitto - Football Association of F...,https://www.linkedin.com/company/football-asso...,Suomen Palloliitto - Football Association of F...,1 week ago,0,About the job\nDATA ENGINEER\n\nSUOMEN PALLOLI...,
2,Senior Game Analyst,"Analytical Skills, Data Analysis, Economics, M...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3717037977/...,"Next Games, A Netflix Game Studio",https://www.linkedin.com/company/next-games/life,"Next Games, A Netflix Game Studio · Helsinki, ...",Reposted 6 days ago,0,About the job\nNext Games is a Netflix Game St...,
3,Data Scientist,"Data Analysis, Data Science, Machine Learning,...",Hybrid,Full-time,https://www.linkedin.com/jobs/view/3735986015/...,MedEngine,https://www.linkedin.com/company/medengine/life,"MedEngine · Helsinki, Uusimaa, Finland 1 week...",1 week ago,0,About the job\nMedEngine is a digitally minded...,
4,Data Science - Machine Learning Engineer,"Artificial Intelligence (AI), Computer Science...",Remote,Full-time,https://www.linkedin.com/jobs/view/3629670334/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland Reposted 2 ...",Reposted 2 weeks ago,0,About the job\nJob Description\n\nTeam purpose...,
...,...,...,...,...,...,...,...,...,...,...,...,...
229,Senior Privacy Counsel,"Communication, Data Privacy, General Data Prot...",On-site,Full-time,https://www.linkedin.com/jobs/view/3706022536/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland Reposted 1 ...",Reposted 1 week ago,0,About the job\nJob Description\n\nAre you an e...,
230,"Kotlin Backend Engineer, Merchant Group",Back-End Web Development and Software Developm...,Hybrid,Full-time,https://www.linkedin.com/jobs/view/3742731278/...,Wolt,https://www.linkedin.com/company/wolt-oy/life,"Wolt · Helsinki, Uusimaa, Finland 3 days ago ...",3 days ago,0,About the job\nJob Description\n\nWe’re lookin...,
231,Doctoral Researcher in Bacterial Temperature A...,"Research ProjectsAdaptation, Bioinformatics, B...",Full-time,,https://www.linkedin.com/jobs/view/3741784865/...,University of Helsinki,https://www.linkedin.com/company/university-of...,"University of Helsinki · Helsinki, Uusimaa, Fi...",5 days ago,0,About the job\nThe University of Helsinki is o...,Benefits found in job post\nMedical insurance
232,Cloud Service Manager,Cloud Computing and EnglishFostering inclusivi...,Hybrid,Full-time,https://www.linkedin.com/jobs/view/3727734994/...,OpenText,https://www.linkedin.com/company/opentext/life,"OpenText · Tampere, Pirkanmaa, Finland 2 week...",2 weeks ago,0,About the job\nOpenText - The Information Comp...,


In [30]:
df_crawled_jobs.to_csv("../data/crawled_jobs_1-236_checkpoint.csv", index=False)

### 2.1 Continue from the failed point

In [31]:
# Continue

CONTINUE_FROM = 161

for i, job in enumerate(jobs):
    if i+1<CONTINUE_FROM:
        continue
        
    print(f"Crawling... Jobs {i+1}/{N_JOBS}")
    _crawled_job = _Job(linkedin_url=job.get("linkedin_url"), driver=driver, close_on_complete=False, scrape=True)
    crawled_jobs.append(_crawled_job)
    sleep(1)

Crawling... Jobs 194/313
Crawling... Jobs 195/313
Crawling... Jobs 196/313
Crawling... Jobs 197/313
Crawling... Jobs 198/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 199/313
Crawling... Jobs 200/313
Crawling... Jobs 201/313
Crawling... Jobs 202/313
Crawling... Jobs 203/313
Crawling... Jobs 204/313
Crawling... Jobs 205/313
Crawling... Jobs 206/313
Crawling... Jobs 207/313
Crawling... Jobs 208/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 209/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 210/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 211/313
Crawling... Jobs 212/313
Crawling... Jobs 213/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 214/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 215/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>

ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c02

Crawling... Jobs 216/313
Crawling... Jobs 217/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 218/313
Crawling... Jobs 219/313
Crawling... Jobs 220/313
Crawling... Jobs 221/313
Crawling... Jobs 222/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 223/313
Crawling... Jobs 224/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 225/313
Crawling... Jobs 226/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 227/313
Crawling... Jobs 228/313
Crawling... Jobs 229/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 230/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 231/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 232/313
Crawling... Jobs 233/313
Crawling... Jobs 234/313
Crawling... Jobs 235/313


ERROR:__main__:Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>



Crawling... Jobs 236/313


TimeoutException: Message: 
Stacktrace:
#0 0x55fd0c4c74e3 <unknown>
#1 0x55fd0c1f6c76 <unknown>
#2 0x55fd0c232c96 <unknown>
#3 0x55fd0c232dc1 <unknown>
#4 0x55fd0c26c7f4 <unknown>
#5 0x55fd0c25203d <unknown>
#6 0x55fd0c26a30e <unknown>
#7 0x55fd0c251de3 <unknown>
#8 0x55fd0c2272dd <unknown>
#9 0x55fd0c22834e <unknown>
#10 0x55fd0c4873e4 <unknown>
#11 0x55fd0c48b3d7 <unknown>
#12 0x55fd0c495b20 <unknown>
#13 0x55fd0c48c023 <unknown>
#14 0x55fd0c45a1aa <unknown>
#15 0x55fd0c4b06b8 <unknown>
#16 0x55fd0c4b0847 <unknown>
#17 0x55fd0c4c0243 <unknown>
#18 0x7fd44c094ac3 <unknown>
