In [6]:
import requests, time, pandas as pd

def fetch_api_tashkent(max_pages=20):
    url = "https://api.hh.ru/vacancies"
    headers = {"accept-language": "ru"}
    rows = []
    for page in range(max_pages):
        params = {"area": 2562, "per_page": 50, "page": page}
        r = requests.get(url, params=params, headers=headers)
        data = r.json()
        items = data.get("items", [])
        if not items:
            break
        for it in items:
            area = it.get("area") or {}
            if str(area.get("id")) != "2562" and area.get("name") != "Ташкент":
                continue
            rows.append({
                "h_id": it.get("id"),
                "title": it.get("name"),
                "company": (it.get("employer") or {}).get("name"),
                "city": area.get("name"),
                "salary_from": (it.get("salary") or {}).get("from"),
                "salary_to": (it.get("salary") or {}).get("to"),
                "currency": (it.get("salary") or {}).get("currency"),
                "published_at": it.get("published_at"),
                "link": it.get("alternate_url"),
                "source": "API"
            })
        print(f"API sahifa {page+1} → hozircha {len(rows)} ta vacancy")
        time.sleep(0.4)
    return pd.DataFrame(rows)

df_api = fetch_api_tashkent(max_pages=20)
print("API orqali jami vacancy:", len(df_api))


API sahifa 1 → hozircha 11 ta vacancy
API sahifa 2 → hozircha 18 ta vacancy
API sahifa 3 → hozircha 27 ta vacancy
API sahifa 4 → hozircha 32 ta vacancy
API sahifa 5 → hozircha 36 ta vacancy
API sahifa 6 → hozircha 39 ta vacancy
API sahifa 7 → hozircha 42 ta vacancy
API sahifa 8 → hozircha 45 ta vacancy
API sahifa 9 → hozircha 49 ta vacancy
API sahifa 10 → hozircha 51 ta vacancy
API sahifa 11 → hozircha 55 ta vacancy
API sahifa 12 → hozircha 58 ta vacancy
API sahifa 13 → hozircha 61 ta vacancy
API sahifa 14 → hozircha 65 ta vacancy
API sahifa 15 → hozircha 68 ta vacancy
API sahifa 16 → hozircha 70 ta vacancy
API sahifa 17 → hozircha 71 ta vacancy
API sahifa 18 → hozircha 75 ta vacancy
API sahifa 19 → hozircha 76 ta vacancy
API sahifa 20 → hozircha 79 ta vacancy
API orqali jami vacancy: 79


In [7]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

def fetch_scraping_tashkent(pages=10, headless=True):
    options = webdriver.ChromeOptions()
    if headless:
        options.add_argument("--headless")
    options.add_argument("--disable-blink-features=AutomationControlled")
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    rows = []

    for page in range(pages):
        url = f"https://hh.uz/search/vacancy?area=2562&page={page}"
        driver.get(url)

        # Sahifa to‘liq yuklanishini kutish
        try:
            WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a[data-qa='serp-item__title']"))
            )
        except:
            print(f"Sahifa {page+1} → vacancy yuklanmadi")
            continue

        vacancy_links = driver.find_elements(By.CSS_SELECTOR, "a[data-qa='serp-item__title']")
        for a in vacancy_links:
            rows.append({
                "h_id": None,
                "title": a.text.strip(),
                "company": None,
                "city": "Ташкент",  # default, area=2562 bo‘lganligi uchun
                "salary_from": None,
                "salary_to": None,
                "currency": None,
                "published_at": None,
                "link": a.get_attribute("href"),
                "source": "Scraping"
            })

        print(f"Sahifa {page+1} → {len(vacancy_links)} ta vacancy topildi")
        time.sleep(1)  # sahifalar orasida kichik pauza

    driver.quit()
    return pd.DataFrame(rows)

# Test
df_scraping = fetch_scraping_tashkent(pages=10, headless=True)
print("Scraping orqali jami vacancy:", len(df_scraping))


Sahifa 1 → 50 ta vacancy topildi
Sahifa 2 → 50 ta vacancy topildi
Sahifa 3 → 50 ta vacancy topildi
Sahifa 4 → 50 ta vacancy topildi
Sahifa 5 → 50 ta vacancy topildi
Sahifa 6 → 50 ta vacancy topildi
Sahifa 7 → 50 ta vacancy topildi
Sahifa 8 → 50 ta vacancy topildi
Sahifa 9 → 50 ta vacancy topildi
Sahifa 10 → 50 ta vacancy topildi
Scraping orqali jami vacancy: 500


In [8]:
import pandas as pd

df_all = pd.concat([df_api, df_scraping], ignore_index=True)

# published_at ustunini datetime formatga o'tkazish
df_all["published_at"] = pd.to_datetime(df_all["published_at"], errors="coerce")

# Noyob vacancy qoldirish (h_id mavjud bo'lsa h_id asosida, aks holda link)
df_all["uniq_key"] = df_all["h_id"].fillna(df_all["link"])
df_all = df_all.drop_duplicates(subset=["uniq_key"]).drop(columns=["uniq_key"])

print("Umumiy yig‘ilgan vacancy soni:", len(df_all))


Umumiy yig‘ilgan vacancy soni: 546


  df_all = pd.concat([df_api, df_scraping], ignore_index=True)


In [9]:
import pyodbc
import pandas as pd

# salary ustunini yaratish
df_all['salary'] = df_all['salary_from'].combine_first(df_all['salary_to'])

# SQL ulanish
conn = pyodbc.connect('Driver={SQL Server};Server=WIN-ENG5O096M48;Database=headhunter1;Trusted_Connection=yes')
cursor = conn.cursor()

# Jadvalni tozalash
cursor.execute("TRUNCATE TABLE Vacancyy;")
conn.commit()

# Ma’lumotlarni tayyorlash
rows_to_insert = [
    (
        r['title'],
        r['company'],
        r['city'],
        r['salary'] if pd.notnull(r['salary']) else None,
        r['currency'] if pd.notnull(r['currency']) else None,
        r['published_at'].to_pydatetime() if pd.notnull(r['published_at']) else None,
        r['link'],
        r['source']
    )
    for _, r in df_all.iterrows()
]

# Yozish
cursor.fast_executemany = True
cursor.executemany("""
    INSERT INTO Vacancyy (title, company, city, salary, currency, published_at, link, source)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", rows_to_insert)

conn.commit()
conn.close()

print("Toza Toshkent dataset bazaga yozildi. Count:", len(df_all))


✅ Toza Toshkent dataset bazaga yozildi. Count: 546
