In [3]:
import requests
from bs4 import BeautifulSoup
import time
import csv
import json

ITEM_URL_BASE = "https://bina.az"
BASE_URL = "https://bina.az/items/all"
ITEM_FILE = "data/item.html"
HOME_FILE = "data/test.html"
OUTPUT_FILE = "data/items_data.json"
headers = {'User-Agent': 'Mozilla/5.0'}

def get_links_from_base():
    # Send a request to the base URL
    response = requests.get(BASE_URL, headers=headers)
    
    if response.status_code != 200:
        print(f"Failed to fetch page: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")

    links = []
    # Select all ad links from the page
    for a in soup.select("a.item_link"):  # Use correct class name here
        href = a.get("href")
        if href:
            links.append(href)

    # Optional: Save raw HTML to file for debugging or future use
    with open(HOME_FILE, mode="w", newline="", encoding="utf-8") as f:
        f.write(response.text)

    return links

# Run the function and print results
links = get_links_from_base()
print(links)


['/items/4347196', '/items/4347196', '/items/5114087', '/items/5114087', '/items/5044379', '/items/5044379', '/items/5044379', '/items/5044379', '/items/5168196', '/items/5168196', '/items/5168196', '/items/5168196', '/items/5064125', '/items/5064125', '/items/5064125', '/items/5187268', '/items/5187268', '/items/4759000', '/items/4759000', '/items/4759000', '/items/5178543', '/items/5178543', '/items/5178543', '/items/5178543', '/items/5119747', '/items/5119747', '/items/5119747', '/items/5119747', '/items/5187267', '/items/5187267', '/items/5187267', '/items/5187267', '/items/5010698', '/items/5010698', '/items/5010698', '/items/5109891', '/items/5109891', '/items/5109891', '/items/5109891', '/items/4618218', '/items/4618218', '/items/4618218', '/items/5187266', '/items/5187266', '/items/5187266', '/items/5156030', '/items/5156030', '/items/5120898', '/items/5120898', '/items/5120898', '/items/5056395', '/items/5056395', '/items/5056395', '/items/5056395', '/items/4844105', '/items/5

In [4]:
headers = {
    'User-Agent': 'Mozilla/5.0'
}

# Assume links is defined from previous scraping step (e.g., from get_links_from_base())
# Example: links = ["/items/5160924", "/items/5110023", ...]

def extract_item_data(link):
    url = ITEM_URL_BASE + link
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Failed to fetch {url}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract item ID
    item_id = link.split("/")[-1]

    # Extract amount and currency using meta tags
    amount_tag = soup.find('meta', {'property': 'product:price:amount'})
    currency_tag = soup.find('meta', {'property': 'product:price:currency'})

    amount = amount_tag['content'] if amount_tag else None
    currency = currency_tag['content'] if currency_tag else None

    # Extract property fields dynamically
    properties = {}
    prop_div = soup.find("div", class_="product-properties__column")
    if prop_div:
        for prop in prop_div.find_all("div", class_="product-properties__i"):
            name = prop.find("label", class_="product-properties__i-name").get_text(strip=True)
            value = prop.find("span", class_="product-properties__i-value").get_text(strip=True)
            properties[name] = value

    # Construct result dictionary
    item_data = {
        'item_id': item_id,
        'amount': amount,
        'currency': currency,
    }

    # Add all found properties dynamically
    item_data.update(properties)

    return item_data


def scrape_all_items():
    results = []

    for idx, link in enumerate(links):
        print(f"[{idx+1}/{len(links)}] Processing: {link}")
        data = extract_item_data(link)
        if data:
            results.append(data)

    # Save to JSON
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    print(f"Scraping completed. Data saved to {OUTPUT_FILE}")


# Run the scraper
scrape_all_items()

[1/81] Processing: /items/4347196
[2/81] Processing: /items/4347196
[3/81] Processing: /items/5114087
[4/81] Processing: /items/5114087
[5/81] Processing: /items/5044379
[6/81] Processing: /items/5044379
[7/81] Processing: /items/5044379
[8/81] Processing: /items/5044379
[9/81] Processing: /items/5168196
[10/81] Processing: /items/5168196
[11/81] Processing: /items/5168196
[12/81] Processing: /items/5168196
[13/81] Processing: /items/5064125
[14/81] Processing: /items/5064125
[15/81] Processing: /items/5064125
[16/81] Processing: /items/5187268
[17/81] Processing: /items/5187268
[18/81] Processing: /items/4759000
[19/81] Processing: /items/4759000
[20/81] Processing: /items/4759000
[21/81] Processing: /items/5178543
[22/81] Processing: /items/5178543
[23/81] Processing: /items/5178543
[24/81] Processing: /items/5178543
[25/81] Processing: /items/5119747
[26/81] Processing: /items/5119747
[27/81] Processing: /items/5119747
[28/81] Processing: /items/5119747
[29/81] Processing: /items/51