In [19]:
import requests
from bs4 import BeautifulSoup
import csv
import time

def get_property_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')

        data = {
            "grad": None,
            "opstina": None,
            "kvart": None,
            "kvadratura": None,
            "broj_soba": None,
            "spratnost": None,
            "stanje": None,
            "grejanje": None,
            "cena": None,
            "lift": None,
            "podrum": None,
            "terasa": None
        }

        # Grad, opština i kvart
        location_tag = soup.find('div', class_='flex flex-col items-start justify-center desk:flex-row desk:justify-start desk:gap-2')
        if location_tag:
            spans = location_tag.find_all('span')
            if len(spans) >= 1:
                location_info = spans[0].text.strip().split(', ')
                if len(location_info) >= 3:
                    data["grad"] = location_info[0]
                    data["opstina"] = location_info[1]
                    data["kvart"] = location_info[2]
                elif len(location_info) == 2:
                    data["grad"] = location_info[0]
                    data["opstina"] = location_info[1]


        # Cena
        price_tag = soup.find('div', class_='w-3/8 flex-shrink-0 overflow-clip text-right')
        if price_tag:
            price_p = price_tag.find('p')
            if price_p:
                data["cena"] = price_p.text.strip().replace('€', '').replace('.', '').strip()

        # Kvadratura, broj soba, spratnost
        details_tags = soup.find_all('div', class_='flex flex-1 items-center justify-center bg-white px-2 py-4')
        if details_tags and len(details_tags) >= 3:
            data["kvadratura"] = details_tags[0].find('strong').text.strip().replace('m²', '').strip()
            data["broj_soba"] = details_tags[1].find('strong').text.strip()
            data["spratnost"] = details_tags[2].find('strong').text.strip()

        # O stanu
        stan_tags = soup.select('section.flex.flex-col.gap-1')
        if len(stan_tags) > 0:
            span_tags = stan_tags[0].find_all('span')
            if span_tags:
                for span in span_tags:
                    if 'grejanje' in span.text.lower():
                        data["grejanje"] = span.text.strip()
                    if 'renovirano' in span.text.lower():
                        data["stanje"] = span.text.strip()
                    if 'terasa' in span.text.lower():
                        data["terasa"] = 'Da'
                    if 'podrum' in span.text.lower():
                        data["podrum"] = 'Da'

        # O zgradi
        if len(stan_tags) > 1:
            zgrada_tags = stan_tags[1].find_all('span')
            if zgrada_tags:
                for span in zgrada_tags:
                    if 'lift' in span.text.lower():
                        data["lift"] = 'Da' if '1' in span.text else 'Ne'

        return data

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the URL: {e}")
        return None
    except Exception as e:
        print(f"Error processing the URL: {url}, error: {e}")
        return None

def get_all_links(start_page, end_page):
    links = []

    for page in range(start_page, end_page + 1):
        try:
            next_page = f'https://www.4zida.rs/prodaja-stanova?strana={page}'
            print(f"Fetching page: {next_page}")
            response = requests.get(next_page)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')

            property_divs = soup.find_all('div', class_='flex w-2/3 flex-col justify-between py-2')
            for div in property_divs:
                link = div.find('a')['href']
                if 'prodaja-stanova' in link:
                    full_link = 'https://www.4zida.rs' + link
                    if full_link not in links:  # Proveri duplikate pre dodavanja
                        links.append(full_link)

            # Pauza da se izbegne preopterećenje servera
            time.sleep(1)
        except Exception as e:
            print(f"Error fetching links from {next_page}: {e}")
            break

    return links

# Fetch property links from page 1 to 200
property_links = get_all_links(1, 200)

# Initialize CSV file with headers
with open('property_data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=[
        "grad", "opstina", "kvart", "kvadratura", "broj_soba", "spratnost", "stanje", 
        "grejanje", "cena", "lift", "podrum", "terasa"
    ])
    writer.writeheader()

# Scrape data for each property and write to CSV
scraped_properties = set()
for i, link in enumerate(property_links):
    try:
        print(f"Fetching property data from: {link}")
        property_data = get_property_data(link)
        if property_data:
            data_tuple = tuple(property_data.items())
            if data_tuple not in scraped_properties:  # Proveri duplikate pre dodavanja
                scraped_properties.add(data_tuple)
                print(property_data)  # Print data for verification
                with open('property_data.csv', mode='a', newline='', encoding='utf-8') as file:
                    writer = csv.DictWriter(file, fieldnames=[
                        "grad", "opstina", "kvart", "kvadratura", "broj_soba", "spratnost", "stanje", 
                        "grejanje", "cena", "lift", "podrum", "terasa"
                    ])
                    writer.writerow(property_data)

        # Pauza da se izbegne preopterećenje servera
        time.sleep(1)

    except Exception as e:
        print(f"Error fetching property data from {link}: {e}")
        continue

    # Sačuvaj progres svakih 100 zapisa
    if i % 100 == 0:
        print(f"Saved {i} records.")


Fetching page: https://www.4zida.rs/prodaja-stanova?strana=1
Fetching page: https://www.4zida.rs/prodaja-stanova?strana=2
