In [93]:
import pandas as pd
import csv
import requests
from typing import Any
from datetime import datetime
from bs4 import BeautifulSoup
import dataclasses

In [99]:
@dataclasses.dataclass
class Vacancy:
    vacancy_id: int
    vacancy_title: str
    company_name: str
    salary: float
    city: str
    published_date: datetime
    programming_language: str
    vacancy_url: str

In [101]:
def get_vacancies(params: dict[str, Any], programming_language: str="Python") -> list[Vacancy]:
    url = "https://api.hh.ru/vacancies"
    headers = {
        "User-Agent": "Your User Agent",
    }

    response = requests.get(url, params=params, headers=headers)
    result = []
    if response.status_code == 200:
        data = response.json()
        vacancies = data.get("items", [])
        for vacancy in vacancies:
            vacancy_id = vacancy.get("id")
            vacancy_title = vacancy.get("name")
            vacancy_url = vacancy.get("alternate_url")
            company_name = vacancy.get("employer", {}).get("name")
            city = vacancy.get("area").get("name")
            published_datetime = vacancy.get("published_at")
            published_date = datetime.fromisoformat(published_datetime).date()
            salary_field: None | dict[str, Any ] = vacancy["salary"]
            
            if salary_field is not None:
                salary_from, salary_to = salary_field["from"], salary_field["to"]
                salary = salary_to or salary_from
            else:
                salary = None
            
            result.append(
                Vacancy(
                    vacancy_id = vacancy_id,
                    vacancy_title= vacancy_title,
                    company_name = company_name,
                    salary = salary,
                    city = city,
                    published_date = published_date,
                    programming_language = programming_language,
                    vacancy_url = vacancy_url,
                )
            )
        return result
    else:
        raise 


In [102]:
def init_csv_file_with_fields(file_name: str, fileds: list[str]) -> None:
    with open(f"{file_name}.csv", "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(fileds)

In [103]:
def add_csv_row(file_name: str, row: list[str]) -> None:
    with open(f"{file_name}.csv", "a", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)

In [104]:
params_python = {
    "text": "python junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [105]:
params_go = {
    "text": "go junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [106]:
params_java = {
    "text": "java junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [107]:
params_cpp = {
    "text": "c++ junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [108]:
params_js = {
    "text": "js junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [109]:
params_c_sharp = {
    "text": "c# junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [110]:
params_r = {
    "text": "R junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [111]:
params_rust = {
    "text": "rust junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [112]:
params_php = {
    "text": "php junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [113]:
params_1c = {
    "text": "1c junior",
    "search_field": ["name", "description"],
    "area": 113,
    "per_page": 100, 
    "experience": "noExperience",
    "currency": "RUR",
    "date_from": "2024-02-15",
}

In [114]:
res = []

In [115]:
params_prog_lang = (
    [params_python, "Python"],
    [params_go, "GO"],
    [params_java, "Java"],
    [params_cpp, "C++"],
    [params_js, "JS"],
    [params_c_sharp, "C#"],
    [params_r, "R"],
    [params_rust, "Rust"],
    [params_php, "PHP"],
    [params_1c, "1C"],
)

In [116]:
for params, prog_lang in params_prog_lang:
    res += get_vacancies(params, prog_lang)

In [135]:
print(len(res))

534


In [117]:
init_csv_file_with_fields("dataset", ["ID", "VACANCY_TITLE", "COMPANY_NAME", "SALARY", "CITY", "DATE", "PROGRAMMING_LANGUAGE", "VACANCY_URL"])

In [126]:
v = Vacancy(123, 'qeqwe', 'qwe', 123, 'qwedasdasd', '2200', 'qwe', "https")

In [119]:
for vacancy in res:
    add_csv_row("dataset", [vacancy.__dict__[field] for field in dataclasses.fields(Vacancy)])

KeyError: Field(name='vacancy_id',type=<class 'int'>,default=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD)

In [131]:
for i in dataclasses.fields(Vacancy):
    print(i, '\n')

Field(name='vacancy_id',type=<class 'int'>,default=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD) 

Field(name='vacancy_title',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD) 

Field(name='company_name',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD) 

Field(name='salary',type=<class 'float'>,default=<dataclasses._MISSING_TYPE object at 0x7f87b3379c50>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f87

In [128]:
v.__dict__["vacancy_id"]

123