In [None]:
import requests
from bs4 import BeautifulSoup as bs
from time import sleep
from pprint import pprint

In [None]:
headers = {
    'User-Agent': 
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}

In [None]:
def parse_hh(url, headers, pages=[], index_page=1):
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return pages

    soup = bs(response.content, 'html.parser')
    vacancies = soup.find_all('div', {'class': 'vacancy-serp-item__layout'})
    for vacancy in vacancies:
        pages.append(parse_vacancy_hh(vacancy))

    link_next_page = soup.find('a', {'data-qa': 'pager-next'})
    if link_next_page:
        link_next_page =  'https://spb.hh.ru' + link_next_page['href']
    else:
        return pages

    pages = parse_hh(link_next_page, headers, pages, index_page + 1)   
    return pages
     
def parse_vacancy_hh(vacancy):
    sleep(.1)
    name = vacancy.find('a').text

    salary = vacancy.find('span', {'class', 'bloko-header-section-3'})
    if salary:
        salary = salary.text
        min_salary, max_salary, currency_salary = clean_salary(salary)
    else:
        min_salary, max_salary, currency_salary = None, None, None

    link = vacancy.find('a')['href']
  
    return {
        'name': name,
        'salary': salary,
        'min_salary': min_salary,
        'max_salary': max_salary,
        'currency_salary': currency_salary,
        'link': link,
        'source': 'https://hh.ru/',
  }

def clean_salary(vacancy_salary_text, min_salary=None, max_salary=None, currency_salary=None):
    list_salary = vacancy_salary_text.replace('\u202f', '').split()
    for i in range(len(list_salary) - 1):
        if list_salary[i] == 'от':
            min_salary = int(list_salary[i + 1])
        elif list_salary[i] == 'до':
            max_salary = int(list_salary[i + 1])
        elif list_salary[i] == '-':
            min_salary = int(list_salary[i - 1])
            max_salary = int(list_salary[i + 1])
        currency_salary = list_salary[-1]

    return min_salary, max_salary, currency_salary

In [None]:
job = parse_hh('https://spb.hh.ru/search/vacancy?area=88&search_field=name&search_field=company_name&search_field=description&text=python&no_magic=true&L_save_area=true&items_on_page=20', 
               headers=headers)

In [None]:
len(job)

177

In [None]:
!apt install mongodb > log





In [None]:
!service mongodb start

 * Starting database mongodb
   ...done.


In [None]:
from pymongo import MongoClient

In [None]:
client = MongoClient()

In [None]:
db = client.jobs

In [None]:
collection_jobs = db.hh_ru

In [None]:
def check_new_job(vacancies):
    for vacancy in vacancies:
        if not len(list(collection_jobs.find({'link': vacancy['link']}))):
            collection_jobs.insert_one(vacancy)

In [None]:
check_new_job(job)

In [None]:
len(list(collection_jobs.find()))

177

In [None]:
for job in collection_jobs.find():
    pprint(job, sort_dicts=False)

{'_id': ObjectId('640b5a5b61508961a1cf5ab3'),
 'name': 'Программист/Python-разработчик',
 'salary': '50\u202f000 – 70\u202f000 руб.',
 'min_salary': None,
 'max_salary': None,
 'currency_salary': 'руб.',
 'link': 'https://spb.hh.ru/vacancy/77901810?from=vacancy_search_list&query=python',
 'source': 'https://hh.ru/'}
{'_id': ObjectId('640b5a5b61508961a1cf5ab4'),
 'name': 'Team Lead Python',
 'salary': 'от 400\u202f000 руб.',
 'min_salary': 400000,
 'max_salary': None,
 'currency_salary': 'руб.',
 'link': 'https://spb.hh.ru/vacancy/77732509?from=vacancy_search_list&query=python',
 'source': 'https://hh.ru/'}
{'_id': ObjectId('640b5a5b61508961a1cf5ab5'),
 'name': 'Python-разработчик',
 'salary': None,
 'min_salary': None,
 'max_salary': None,
 'currency_salary': None,
 'link': 'https://spb.hh.ru/vacancy/77705925?from=vacancy_search_list&query=python',
 'source': 'https://hh.ru/'}
{'_id': ObjectId('640b5a5b61508961a1cf5ab6'),
 'name': 'Ведущий Python-developer',
 'salary': '240\u202f000 – 

In [None]:
def salary_enough_for_my_greed(collection, salary, currency):
    enough = []
    for element in collection.find():
        try:
            if element['currency_salary'] == currency and (element['min_salary'] > salary or element['max_salary'] > salary):
                enough.append(element)
        except TypeError:
            pass
            
    return enough

In [None]:
salary_enough_for_my_greed(collection_jobs, 250000, 'руб.')

[{'_id': ObjectId('640b5a5b61508961a1cf5ab4'),
  'name': 'Team Lead Python',
  'salary': 'от 400\u202f000 руб.',
  'min_salary': 400000,
  'max_salary': None,
  'currency_salary': 'руб.',
  'link': 'https://spb.hh.ru/vacancy/77732509?from=vacancy_search_list&query=python',
  'source': 'https://hh.ru/'}]