In [1]:
!pip install requests




In [2]:
import requests
import json
import time

BASE_URL = 'https://api.hh.ru/vacancies'
params = {
    'text': 'sales consultant',
    'area': 88,    # Region code (Kazan)
    'per_page': 100,    # Maximum 100 vacancies at a time
    'page': 0    # Page number
}

all_vacancies = []


while True:
    print(f"Fetching page {params['page']}...")
    response = requests.get(BASE_URL, params=params)

    if response.status_code != 200:
        print(f"Request error: {response.status_code}")
        break

    data = response.json()
    items = data.get('items', [])

    if not items:
        print("No more vacancies found")
        break

    all_vacancies.extend(items)

    # Check if we've reached the last page
    if params['page'] >= data['pages'] - 1:
        break

    params['page'] += 1
    time.sleep(0.5)  # Pause to avoid overloading the API

# Save complete vacancies
with open('vacancies.json', 'w', encoding='utf-8') as f:
    json.dump(all_vacancies, f, ensure_ascii=False, indent=2)

print(f"Saved {len(all_vacancies)} vacancies to vacancies.json")

Fetching page 0...
Saved 3 vacancies to vacancies.json


In [3]:
import json

# Load original file
with open('vacancies.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Extract necessary info
cleaned = []

for vac in data:
    cleaned.append({
        'title': vac.get('name'),
        'salary': None if not vac.get('salary') else {
            'from': vac['salary'].get('from'),
            'to': vac['salary'].get('to'),
            'currency': vac['salary'].get('currency')
        },
        'working_hours': [w.get('name') for w in vac.get('working_time_intervals', [])],
        'schedule': [s.get('name') for s in vac.get('work_schedule', [])],
        'experience': vac.get('experience', {}).get('name')
    })

# Save to new JSON
with open('vacancies_cleaned.json', 'w', encoding='utf-8') as f:
    json.dump(cleaned, f, ensure_ascii=False, indent=2)

print(f"Saved {len(cleaned)} vacancies to vacancies_cleaned.json")


Saved 3 vacancies to vacancies_cleaned.json


In [4]:
import json
import re

# Load data
with open('vacancies_cleaned.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Update each vacancy
for vac in data:
    hours = []
    for h in vac.get('working_hours', []):
        match = re.search(r'\d+', h)
        if match:
            hours.append(int(match.group()))
    vac['working_hours'] = hours

    exp = vac.get('experience')
    if exp:
        if 'Нет опыта' in exp:
            vac['experience'] = 'no experience'
        else:
            years = re.findall(r'\d+', exp)
            if len(years) == 2:
                vac['experience'] = f'{years[0]}-{years[1]} years'
            elif len(years) == 1:
                vac['experience'] = f'{years[0]}+ years'

# Save result
with open('vacancies_final.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print(f"{len(data)} vacancies saved to vacancies_final.json")

3 vacancies saved to vacancies_final.json
