In [57]:
import pandas as pd
import csv
import requests
from typing import Any
from datetime import datetime

In [74]:
def get_vacancies(params, programming_language: str="Python") -> None:
    url = "https://api.hh.ru/vacancies"
    headers = {
        "User-Agent": "Your User Agent",
    }

    response = requests.get(url, params=params, headers=headers)
    result = []
    if response.status_code == 200:
        data = response.json()
        vacancies = data.get("items", [])
        for vacancy in vacancies:
            vacancy_id = vacancy.get("id")
            vacancy_title = vacancy.get("name")
            vacancy_url = vacancy.get("alternate_url")
            company_name = vacancy.get("employer", {}).get("name")
            city = vacancy.get("area").get("name")
            published_datetime = vacancy.get("published_at")
            published_date = datetime.fromisoformat(published_datetime).date()
            salary_field: None | dict[str, Any ] = vacancy["salary"]
            
            if salary_field is not None:
                salary_from, salary_to = salary_field["from"], salary_field["to"]
                salary = salary_to or salary_from
            else:
                salary = None
            
            result.append(
                [vacancy_id, vacancy_title, company_name, salary, city, published_date, programming_language, vacancy_url]
            )
        return result
    else:
        raise 


In [75]:
def init_csv_file_with_fields(file_name: str, fileds: list[str]) -> None:
    with open(f"{file_name}.csv", "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(fileds)

In [76]:
def add_csv_row(file_name: str, row: list[str]) -> None:
    with open(f"{file_name}.csv", "a", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)

In [92]:
params_python = {
    "text": "python junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [93]:
params_go = {
    "text": "go junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [94]:
params_java = {
    "text": "java junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [95]:
params_cpp = {
    "text": "c++ junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [96]:
params_js = {
    "text": "js junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [97]:
params_c_sharp = {
    "text": "c# junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [98]:
params_r = {
    "text": "R junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [99]:
params_rust = {
    "text": "rust junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [100]:
params_php = {
    "text": "php junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [101]:
params_1c = {
    "text": "1c junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-03-01",
}

In [102]:
res = []

In [103]:
params_prog_lang = (
    [params_python, "Python"],
    [params_go, "GO"],
    [params_java, "Java"],
    [params_cpp, "C++"],
    [params_js, "JS"],
    [params_c_sharp, "C#"],
    [params_r, "R"],
    [params_rust, "Rust"],
    [params_php, "PHP"],
    [params_1c, "1C"],
)

In [104]:
for params, prog_lang in params_prog_lang:
    res += get_vacancies(params, prog_lang)

In [105]:
init_csv_file_with_fields("dataset", ["ID", "VACANCY_TITLE", "COMPANY_NAME", "SALARY", "CITY", "DATE", "PROGRAMMING_LANGUAGE", "VACANCY_URL"])

In [106]:
for i in res:
    add_csv_row("dataset", i)