In [35]:
import pandas as pd
import csv
import requests
from typing import Any
from datetime import datetime
from bs4 import BeautifulSoup

In [27]:
def get_vacancies(params, programming_language: str="Python") -> None:
    url = "https://api.hh.ru/vacancies"
    headers = {
        "User-Agent": "Your User Agent",
    }

    response = requests.get(url, params=params, headers=headers)
    result = []
    if response.status_code == 200:
        data = response.json()
        vacancies = data.get("items", [])
        for vacancy in vacancies:
            vacancy_id = vacancy.get("id")
            vacancy_title = vacancy.get("name")
            vacancy_url = vacancy.get("alternate_url")
            company_name = vacancy.get("employer", {}).get("name")
            city = vacancy.get("area").get("name")
            published_datetime = vacancy.get("published_at")
            published_date = datetime.fromisoformat(published_datetime).date()
            salary_field: None | dict[str, Any ] = vacancy["salary"]
            
            if salary_field is not None:
                salary_from, salary_to = salary_field["from"], salary_field["to"]
                salary = salary_to or salary_from
            else:
                salary = None
            
            result.append(
                [vacancy_id, vacancy_title, company_name, salary, city, published_date, programming_language, vacancy_url]
            )
        return result
    else:
        raise 


In [33]:
def get_count_viwers(vacancy_id: int) -> int:
    headers = {
        "User-Agent": "Your User Agent",
    }
    url = f"https://hh.ru/vacancy/{vacancy_id}"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        html_content = response.text
        soup = BeautifulSoup(html_content, "html.parser")
        v = soup.findAll('span')
        return v
    else:
        raise ValueError
        

In [34]:
print(get_count_viwers(93341729))

[<span><!--noindex--></span>, <span><!--/noindex--></span>, <span><!--noindex--></span>, <span class="supernova-navi-item_area-switcher-button">Дзержинский</span>, <span><!--/noindex--></span>, <span class="supernova-icon-services-dynamic"><button class="supernova-icon-link-switch HH-Supernova-Overlay-Toggle" data-overlay-name="overlay" data-qa="mainmenu_applicantServices"><span class="supernova-link-switch supernova-link-switch_dimmed">Все сервисы</span><span> </span><span class="supernova-icon-services"></span></button></span>, <span class="supernova-link-switch supernova-link-switch_dimmed">Все сервисы</span>, <span> </span>, <span class="supernova-icon-services"></span>, <span class="supernova-logo supernova-logo_inversed supernova-logo_hh-ru" tabindex="-1"></span>, <span class="supernova-navi-user-type-switcher-text">Ищу работу</span>, <span class="supernova-icon-small supernova-icon_chevron-down supernova-navi-user-type-switcher-icon"></span>, <span class="supernova-icon-dynamic"

In [26]:
def init_csv_file_with_fields(file_name: str, fileds: list[str]) -> None:
    with open(f"{file_name}.csv", "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(fileds)

In [29]:
def add_csv_row(file_name: str, row: list[str]) -> None:
    with open(f"{file_name}.csv", "a", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)

In [30]:
params_python = {
    "text": "python junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [31]:
params_go = {
    "text": "go junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [32]:
params_java = {
    "text": "java junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [33]:
params_cpp = {
    "text": "c++ junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [34]:
params_js = {
    "text": "js junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [35]:
params_c_sharp = {
    "text": "c# junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [36]:
params_r = {
    "text": "R junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [37]:
params_rust = {
    "text": "rust junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [38]:
params_php = {
    "text": "php junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [39]:
params_1c = {
    "text": "1c junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [40]:
res = []

In [41]:
params_prog_lang = (
    [params_python, "Python"],
    [params_go, "GO"],
    [params_java, "Java"],
    [params_cpp, "C++"],
    [params_js, "JS"],
    [params_c_sharp, "C#"],
    [params_r, "R"],
    [params_rust, "Rust"],
    [params_php, "PHP"],
    [params_1c, "1C"],
)

In [42]:
for params, prog_lang in params_prog_lang:
    res += get_vacancies(params, prog_lang)

In [43]:
init_csv_file_with_fields("dataset", ["ID", "VACANCY_TITLE", "COMPANY_NAME", "SALARY", "CITY", "DATE", "PROGRAMMING_LANGUAGE", "VACANCY_URL"])

In [44]:
for i in res:
    add_csv_row("dataset", i)