In [1]:
import requests
from bs4 import BeautifulSoup as bs
from time import sleep
import pandas as pd

In [2]:
headers = {
    'User-Agent': 
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}

In [3]:
def parse_hh(url, headers, pages=[], index_page=1):
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return pages

    soup = bs(response.content, 'html.parser')
    vacancies = soup.find_all('div', {'class': 'vacancy-serp-item__layout'})
    for vacancy in vacancies:
        pages.append(parse_vacancy_hh(vacancy))

    link_next_page = soup.find('a', {'data-qa': 'pager-next'})
    if link_next_page:
        link_next_page =  'https://spb.hh.ru' + link_next_page['href']
    else:
        return pages

    pages = parse_hh(link_next_page, headers, pages, index_page + 1)   
    return pages
     
def parse_vacancy_hh(vacancy):
    sleep(.1)
    name = vacancy.find('a').text

    salary = vacancy.find('span', {'class', 'bloko-header-section-3'})
    if salary:
        salary = salary.text
        min_salary, max_salary, currency_salary = clean_salary(salary)
    else:
        min_salary, max_salary, currency_salary = None, None, None

    link = vacancy.find('a')['href']
  
    return {
        'name': name,
        'salary': salary,
        'min_salary': min_salary,
        'max_salary': max_salary,
        'currency_salary': currency_salary,
        'link': link,
        'source': 'https://hh.ru/',
  }

def clean_salary(vacancy_salary_text, min_salary=None, max_salary=None, currency_salary=None):
    list_salary = vacancy_salary_text.replace('\u202f', '').split()
    for i in range(len(list_salary) - 1):
        if list_salary[i] == 'от':
            min_salary = int(list_salary[i + 1])
        elif list_salary[i] == 'до':
            max_salary = int(list_salary[i + 1])
        elif list_salary[i] == '-':
            min_salary = int(list_salary[i - 1])
            max_salary = int(list_salary[i + 1])
        currency_salary = list_salary[-1]

    return min_salary, max_salary, currency_salary

In [4]:
job = parse_hh('https://spb.hh.ru/search/vacancy?area=88&search_field=name&search_field=company_name&search_field=description&text=python&no_magic=true&L_save_area=true&items_on_page=20', headers)

In [5]:
len(job)

187

In [6]:
for vacancy in job:
    print(f'{vacancy["name"]}')
    print(f'minimum salary {vacancy["min_salary"]}')
    print(f'maximum salary {vacancy["max_salary"]}')
    print(f'currency_salary {vacancy["currency_salary"]}')
    print(f'link {vacancy["link"]}')
    print(f'source {vacancy["source"]}')
    print()

Программист Python
minimum salary None
maximum salary None
currency_salary руб.
link https://spb.hh.ru/vacancy/77774266?from=vacancy_search_list&query=python
source https://hh.ru/

Team Lead Python
minimum salary 400000
maximum salary None
currency_salary руб.
link https://spb.hh.ru/vacancy/77732509?from=vacancy_search_list&query=python
source https://hh.ru/

Python-разработчик
minimum salary None
maximum salary None
currency_salary None
link https://spb.hh.ru/vacancy/77705925?from=vacancy_search_list&query=python
source https://hh.ru/

Middle Python developer (Казань)
minimum salary 160000
maximum salary None
currency_salary руб.
link https://spb.hh.ru/vacancy/76115939?from=vacancy_search_list&query=python
source https://hh.ru/

Junior/Middle Backend-разработчик на Ruby
minimum salary None
maximum salary None
currency_salary руб.
link https://spb.hh.ru/vacancy/76964796?from=vacancy_search_list&query=python
source https://hh.ru/

Lead Python developer
minimum salary None
maximum salary

In [7]:
job_df = pd.DataFrame(job)
job_df

Unnamed: 0,name,salary,min_salary,max_salary,currency_salary,link,source
0,Программист Python,110 000 – 130 000 руб.,,,руб.,https://spb.hh.ru/vacancy/77774266?from=vacanc...,https://hh.ru/
1,Team Lead Python,от 400 000 руб.,400000.0,,руб.,https://spb.hh.ru/vacancy/77732509?from=vacanc...,https://hh.ru/
2,Python-разработчик,,,,,https://spb.hh.ru/vacancy/77705925?from=vacanc...,https://hh.ru/
3,Middle Python developer (Казань),от 160 000 руб.,160000.0,,руб.,https://spb.hh.ru/vacancy/76115939?from=vacanc...,https://hh.ru/
4,Junior/Middle Backend-разработчик на Ruby,110 000 – 150 000 руб.,,,руб.,https://spb.hh.ru/vacancy/76964796?from=vacanc...,https://hh.ru/
...,...,...,...,...,...,...,...
182,Системный администратор (инженер по внедрению),,,,,https://spb.hh.ru/vacancy/76720819?from=vacanc...,https://hh.ru/
183,Системный администратор Linux,,,,,https://spb.hh.ru/vacancy/74406735?from=vacanc...,https://hh.ru/
184,DevOps инженер (Брокерский бизнес),,,,,https://spb.hh.ru/vacancy/76642389?from=vacanc...,https://hh.ru/
185,Системный аналитик,100 000 – 200 000 руб.,,,руб.,https://spb.hh.ru/vacancy/76652169?from=vacanc...,https://hh.ru/


In [8]:
job_df.to_csv('job.csv', index=False)