In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import time
import re
from tqdm import tqdm

from requests.adapters import HTTPAdapter
s = requests.Session()
s.mount('https://www.jobs.ch', HTTPAdapter(max_retries=10))

In [3]:
url_static = "https://www.jobs.ch"
url_vac = "/en/vacancies/"
url_page = "?page="
url_page_nb = "1"
url_term = "&term="
url_search = "Tax" # REPLACE
csv_name = "tax_jobs.csv" # REPLACE

# Get urls of search pages

In [4]:
url = url_static + url_vac + url_page + url_page_nb + url_term + url_search
url

'https://www.jobs.ch/en/vacancies/?page=1&term=Tax'

In [5]:
response = requests.get(url)
parsed_html = BeautifulSoup(response.content, "html.parser")

In [6]:
parsed_nb_page = parsed_html.find(class_="count hidden-sm hidden-md hidden-lg")
nb_page = int(parsed_nb_page.get_text().split()[-1])
nb_page

42

In [7]:
pages = []
for i in range(1,nb_page+1):
    url = url_static + url_vac + url_page + str(i) + url_term + url_search
    pages.append(url)
pages[:2]

['https://www.jobs.ch/en/vacancies/?page=1&term=Tax',
 'https://www.jobs.ch/en/vacancies/?page=2&term=Tax']

# Get urls of vacancies

In [8]:
links = []

for page in tqdm(pages):
    
    # Request and parse result page
    response = requests.get(page)
    parsed_page = BeautifulSoup(response.content, "html.parser")
    
    # Find all job links
    parsed_links = parsed_page.find_all(class_="x--job-link t--job-link")
    for link in parsed_links:
        links.append(url_static+link.attrs["href"])

100%|██████████| 42/42 [00:29<00:00,  1.53it/s]


In [9]:
links[:2]

['https://www.jobs.ch/en/vacancies/detail/8774245/?source=vacancy_search_promo',
 'https://www.jobs.ch/en/vacancies/detail/8858043/?source=vacancy_search_promo']

# Iterate over vacancies to scrape job details

In [10]:
jobs = []

for link in tqdm(links):
    
    # Request and parse job page
    response = requests.get(link)
    parsed_job = BeautifulSoup(response.content, "html.parser")
    
    #Find items
    title = parsed_job.find(class_="e-heading vacancy-ad-title h-base x--vacancy-title").get_text() if parsed_job.find(class_="e-heading vacancy-ad-title h-base x--vacancy-title") else "None"
    company = parsed_job.find(class_="vacancy-ad-company x--vacancy-ad-company").find("span").get_text() if parsed_job.find(class_="vacancy-ad-company x--vacancy-ad-company") else "None"
    location = parsed_job.find(class_="vacancy-ad-company-location").get_text() if parsed_job.find(class_="vacancy-ad-company-location") else "None"
    description = parsed_job.find(class_="container vacancy-detail-content").get_text() if parsed_job.find(class_="container vacancy-detail-content") else "None"
    original_link = parsed_job.find(class_="text-center margin-top-2 margin-top-sm-4 vacancy-detail-original-btn visible-xs").find("a").attrs["href"] if parsed_job.find(class_="text-center margin-top-2 margin-top-sm-4 vacancy-detail-original-btn visible-xs") else "None"
    date = parsed_job.find(class_="vacancy-ad-date small").find("span").get_text() if parsed_job.find(class_="vacancy-ad-date small") else "None"
    
    row = {
        'title': title,
        'company': company,
        'location': location,
        'description': description,
        'link': link,
        'original_link': original_link,
        'date': date
    }
    
    jobs.append(row)
    time.sleep(0.01)

100%|██████████| 834/834 [11:07<00:00,  1.53it/s]


# Create a dataframe

In [11]:
jobs_df = pd.DataFrame(jobs)
jobs_df = jobs_df[['date','title','company','location','description','link','original_link']]
jobs_df.head()

Unnamed: 0,date,title,company,location,description,link,original_link
0,"Friday, April 5, 2019","Steuerexpertin / Steuerexperte 60-100%, Bern City",Credit Suisse AG,Bern,"Steuerexpertin / Steuerexperte 60-100%, Bern C...",https://www.jobs.ch/en/vacancies/detail/877424...,https://tas-creditsuisse.taleo.net/careersecti...
1,"Monday, April 8, 2019",Treuhänder (m/w) 80 - 100%,BDO AG,Aarau,Werden Sie Teil unseres erfolgreichen Unterneh...,https://www.jobs.ch/en/vacancies/detail/885804...,https://recruitingapp-2789.umantis.com/Vacanci...
2,"Friday, April 5, 2019",Junior Expert - Tax Reporting,Vontobel,Zürich,Junior Expert - Tax ReportingWir bei Vontobel ...,https://www.jobs.ch/en/vacancies/detail/885461...,
3,"Monday, April 22, 2019",Corporate Tax Manager/in,IVP Ivan Vaccari Personalberatung,Zürich,Unsere Kundin ist eine expandierende Treuhand...,https://www.jobs.ch/en/vacancies/detail/888751...,
4,dimanche 21 avril 2019,Operational Senior Tax Expert,SIX,Olten,SIX betreibt die schweizerische Finanzplatzinf...,https://www.jobs.ch/en/vacancies/detail/867074...,https://ohws.prospective.ch/public/v1/jobs/5e2...


# Save to CSV

In [12]:
jobs_df.to_csv(csv_name, encoding='utf-8', index=False)