# Unegui.mn Apartment Scraper

Collects apartment ads from Unegui.mn 
Unegui.mn is the Mongolia's most widely used advertisement site.

## How it works

1. Opens the website - Uses undetected Chrome to avoid bot detection
2. Collects ad links - Scrolls through search results and collects all apartment ad links
3. Visits each ad - Goes through each ad one by one and extracts data
4. Exports all collected data to one csv file.

## Final collected features
- Floor type
- Balcony
- Garage
- Window type
- Door type
- Number of windows
- Construction progress
- Built year
- Total floor
- Size
- Located floor
- Elevator
- Payment term
- District
- Location
- View count
- Scraped date
- Posted date
- Ad link
- Price
- Number of rooms
- Ad title
- Ad description

Scraped data from 8 districts of Ulaanbaatar:
- Bayanzurkh 
- Sukhbaatar 
- Bayangol 
- Chingeltei 
- Khan Uul 
- Songinokhairkhan 
- Baganuur
- Nalaikh

Combined all district datasets into one final dataset for apartment price prediction modeling. This provides geographic diversity and prevents location bias in the prediction model.

# Bayanzurkh district

In [3]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import codecs
import csv
import time
import pandas as pd
from datetime import date
import re

# column names such as floor type, balcony, garage, window type etc
header = ['–®–∞–ª:', '–¢–∞–≥—Ç:', '–ì–∞—Ä–∞–∂:', '–¶–æ–Ω—Ö:', '–•–∞–∞–ª–≥–∞:', '–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', '–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 
          '–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', '–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', '–¢–∞–ª–±–∞–π:', '–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 
          '–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', '–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', '–î“Ø“Ø—Ä—ç–≥:', '–ë–∞–π—Ä—à–∏–ª:', '“Æ–∑—Å—ç–Ω:', 
          'Scraped_date:', 'Posted_date:', 'Link:', '“Æ–Ω—ç:', '”®—Ä”©”©–Ω–∏–π –¢–æ–æ:', 
          '–ó–∞—Ä—ã–Ω –≥–∞—Ä—á–∏–≥:', '–ó–∞—Ä—ã–Ω –¢–∞–π–ª–±–∞—Ä:']

csv_list = []

# searches for specific property attribute in the list and extract the value
def key_finder(key, index, span_list, key_list):
    for item in span_list:
        if str(key) in item:
            key_list[index] = item.split(':')[1]
    if type(key_list[int(index)]) != str:
        key_list[int(index)] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

# extract all unique property ad links from the page
def extract_listing_links(soup):
    listing_links = set()
    pattern = re.compile(r'^/adv/\d+_[a-z0-9-]+/?$')
    
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if pattern.match(href):
            if not any(exclude in href for exclude in ['?', '#', 'page=', 'sort=', 'view=']):
                listing_links.add(href)
    
    return listing_links

# configuring Chrome options
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# undetected Chrome driver for avoiding bot
driver = uc.Chrome(options=options, version_main=None) 
wait = WebDriverWait(driver, 15)

try:
    base_url = 'https://www.unegui.mn/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/ub-bayanzrh/'   

    # navigate to base url
    driver.get(base_url)
    time.sleep(5)  
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    # pagination variables
    all_listing_urls = set()
    current_page = 1
    max_pages = 2  
    no_new_count = 0

    # loop through pages and collect ad links
    while current_page <= max_pages:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        page_listings = extract_listing_links(soup)
        
        new_listings = page_listings - all_listing_urls

        # stop if no new ad on 2 pages
        if len(new_listings) == 0:
            no_new_count += 1
            if no_new_count >= 2:
                break
        else:
            no_new_count = 0
            all_listing_urls.update(new_listings)
        
        # to next page
        if current_page < max_pages:
            next_page_url = f"{base_url}?page={current_page + 1}"
            driver.get(next_page_url)
            time.sleep(4)
            
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            current_page += 1
        else:
            break

    # converting links
    
    listing_urls = [f"https://www.unegui.mn{link}" for link in all_listing_urls]
    
    if len(listing_urls) == 0:
        driver.quit()
        exit()
    
    # scraping each ad
    start_time = time.time()
    successful = 0

    #going through each and collect data
    for i, listing_url in enumerate(listing_urls, 1):
        try:
            # go to each ad
            driver.get(listing_url)
            time.sleep(1.5)
            
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

            # parsing page html
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            key_list = ["–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"] * len(header)
            
            try:
                details = soup.find('div', class_='announcement-characteristics clearfix').find_all('li')
                span_list = [item.text.replace('\n', '').replace(' ', '') for item in details]
            except:
                span_list = []

            # extracting specific datas
            key_finder('–®–∞–ª:', 0, span_list, key_list) #floor
            key_finder('–¢–∞–≥—Ç:', 1, span_list, key_list) #balcony
            key_finder('–ì–∞—Ä–∞–∂:', 2, span_list, key_list) #garage
            key_finder('–¶–æ–Ω—Ö:', 3, span_list, key_list) #window
            key_finder('–•–∞–∞–ª–≥–∞:', 4, span_list, key_list) #door
            key_finder('–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', 5, span_list, key_list) #number of windows
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 6, span_list, key_list) #construction progress
            key_finder('–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', 7, span_list, key_list) #built year
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', 8, span_list, key_list) #total floor
            key_finder('–¢–∞–ª–±–∞–π:', 9, span_list, key_list) #size
            key_finder('–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 10, span_list, key_list) #located floor
            key_finder('–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', 11, span_list, key_list) #elevator
            key_finder('–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', 12, span_list, key_list) #payment term

            # get address
            try:
                address = soup.find('span', itemprop="address").text.split('‚Äî')
                key_list[13] = address[0].strip() if len(address) > 0 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
                key_list[14] = address[1].strip() if len(address) > 1 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            except:
                key_list[13] = key_list[14] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # get view count
            try:
                key_list[15] = soup.find('span', class_='counter-views').text.strip()
            except:
                key_list[15] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            
            # scraped date as today
            key_list[16] = date.today().strftime("%Y/%m/%d")

            # posted date
            try:
                key_list[17] = soup.find('span', class_='date-meta').text.strip()
            except:
                key_list[17] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad link
            key_list[18] = listing_url

            # price
            try:
                key_list[19] = soup.find('meta', itemprop='price')['content']
            except:
                key_list[19] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # number of room
            try:
                key_list[20] = soup.find('div', class_='wrap js-single-item__location').find_all('span')[-1].text.strip()
            except:
                key_list[20] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad title
            try:
                key_list[21] = soup.find('h1', class_='title-announcement').text.strip()
            except:
                key_list[21] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad description
            try:
                key_list[22] = soup.find('div', class_='announcement-description').text.strip()
            except:
                key_list[22] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # saving datas to list
            csv_list.append(dict(zip(header, key_list)))
            successful += 1
            
        except Exception as e:
            continue

    # save the data to csv
    filename = f"Unegui_bayanzurkh_{date.today().strftime('%Y%m%d')}.csv"
    with codecs.open(filename, 'w', 'utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(csv_list)
    
    df = pd.DataFrame(csv_list)
    print(df.head())
    
except Exception as e:
    pass
    
finally:
    driver.quit()

     –®–∞–ª:      –¢–∞–≥—Ç:   –ì–∞—Ä–∞–∂:  –¶–æ–Ω—Ö: –•–∞–∞–ª–≥–∞: –¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:      –ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü  \
0  –ü–∞—Ä–∫–µ—Ç   1—Ç–∞–≥—Ç—Ç–∞–π   –ë–∞–π–≥–∞–∞  –í–∞–∫—É–º   –¢”©–º”©—Ä          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
1  –ü–∞—Ä–∫–µ—Ç   2—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          2  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
2  –ü–∞—Ä–∫–µ—Ç  3+—Ç–∞–≥—Ç—Ç–∞–π   –ë–∞–π–≥–∞–∞  –í–∞–∫—É–º   –¢”©–º”©—Ä          8  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
3  –ü–∞—Ä–∫–µ—Ç   1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º     –ú–æ–¥          2  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
4  –ü–∞—Ä–∫–µ—Ç   2—Ç–∞–≥—Ç—Ç–∞–π   –ë–∞–π–≥–∞–∞  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   

  –ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω: –ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:  –¢–∞–ª–±–∞–π:  ... –î“Ø“Ø—Ä—ç–≥:  \
0                2019              16  74.15–º¬≤  ...      –£–ë   
1                2005               6   49.7–º¬≤  ...      –£–ë   
2                2007              13 

# Sukhbaatar district

In [7]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import codecs
import csv
import time
import pandas as pd
from datetime import date
import re

# column names such as floor type, balcony, garage, window type etc
header = ['–®–∞–ª:', '–¢–∞–≥—Ç:', '–ì–∞—Ä–∞–∂:', '–¶–æ–Ω—Ö:', '–•–∞–∞–ª–≥–∞:', '–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', '–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 
          '–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', '–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', '–¢–∞–ª–±–∞–π:', '–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 
          '–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', '–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', '–î“Ø“Ø—Ä—ç–≥:', '–ë–∞–π—Ä—à–∏–ª:', '“Æ–∑—Å—ç–Ω:', 
          'Scraped_date:', 'Posted_date:', 'Link:', '“Æ–Ω—ç:', '”®—Ä”©”©–Ω–∏–π –¢–æ–æ:', 
          '–ó–∞—Ä—ã–Ω –≥–∞—Ä—á–∏–≥:', '–ó–∞—Ä—ã–Ω –¢–∞–π–ª–±–∞—Ä:']

csv_list = []

# searches for specific property attribute in the list and extract the value
def key_finder(key, index, span_list, key_list):
    for item in span_list:
        if str(key) in item:
            key_list[index] = item.split(':')[1]
    if type(key_list[int(index)]) != str:
        key_list[int(index)] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

# extract all unique property ad links from the page
def extract_listing_links(soup):
    listing_links = set()
    pattern = re.compile(r'^/adv/\d+_[a-z0-9-]+/?$')
    
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if pattern.match(href):
            if not any(exclude in href for exclude in ['?', '#', 'page=', 'sort=', 'view=']):
                listing_links.add(href)
    
    return listing_links

# configuring Chrome options
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# undetected Chrome driver for avoiding bot
driver = uc.Chrome(options=options, version_main=None) 
wait = WebDriverWait(driver, 15)

try:
    base_url = 'https://www.unegui.mn/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/ulan-bator/'   

    # navigate to base url
    driver.get(base_url)
    time.sleep(5)  
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    # pagination variables
    all_listing_urls = set()
    current_page = 1
    max_pages = 2  
    no_new_count = 0

    # loop through pages and collect ad links
    while current_page <= max_pages:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        page_listings = extract_listing_links(soup)
        
        new_listings = page_listings - all_listing_urls

        # stop if no new ad on 2 pages
        if len(new_listings) == 0:
            no_new_count += 1
            if no_new_count >= 2:
                break
        else:
            no_new_count = 0
            all_listing_urls.update(new_listings)
        
        # to next page
        if current_page < max_pages:
            next_page_url = f"{base_url}?page={current_page + 1}"
            driver.get(next_page_url)
            time.sleep(4)
            
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            current_page += 1
        else:
            break

    # converting links
    
    listing_urls = [f"https://www.unegui.mn{link}" for link in all_listing_urls]
    
    if len(listing_urls) == 0:
        driver.quit()
        exit()
    
    # scraping each ad
    start_time = time.time()
    successful = 0

    #going through each and collect data
    for i, listing_url in enumerate(listing_urls, 1):
        try:
            # go to each ad
            driver.get(listing_url)
            time.sleep(1.5)
            
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

            # parsing page html
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            key_list = ["–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"] * len(header)
            
            try:
                details = soup.find('div', class_='announcement-characteristics clearfix').find_all('li')
                span_list = [item.text.replace('\n', '').replace(' ', '') for item in details]
            except:
                span_list = []

            # extracting specific datas
            key_finder('–®–∞–ª:', 0, span_list, key_list) #floor
            key_finder('–¢–∞–≥—Ç:', 1, span_list, key_list) #balcony
            key_finder('–ì–∞—Ä–∞–∂:', 2, span_list, key_list) #garage
            key_finder('–¶–æ–Ω—Ö:', 3, span_list, key_list) #window
            key_finder('–•–∞–∞–ª–≥–∞:', 4, span_list, key_list) #door
            key_finder('–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', 5, span_list, key_list) #number of windows
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 6, span_list, key_list) #construction progress
            key_finder('–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', 7, span_list, key_list) #built year
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', 8, span_list, key_list) #total floor
            key_finder('–¢–∞–ª–±–∞–π:', 9, span_list, key_list) #size
            key_finder('–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 10, span_list, key_list) #located floor
            key_finder('–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', 11, span_list, key_list) #elevator
            key_finder('–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', 12, span_list, key_list) #payment term

            # get address
            try:
                address = soup.find('span', itemprop="address").text.split('‚Äî')
                key_list[13] = address[0].strip() if len(address) > 0 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
                key_list[14] = address[1].strip() if len(address) > 1 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            except:
                key_list[13] = key_list[14] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # get view count
            try:
                key_list[15] = soup.find('span', class_='counter-views').text.strip()
            except:
                key_list[15] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            
            # scraped date as today
            key_list[16] = date.today().strftime("%Y/%m/%d")

            # posted date
            try:
                key_list[17] = soup.find('span', class_='date-meta').text.strip()
            except:
                key_list[17] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad link
            key_list[18] = listing_url

            # price
            try:
                key_list[19] = soup.find('meta', itemprop='price')['content']
            except:
                key_list[19] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # number of room
            try:
                key_list[20] = soup.find('div', class_='wrap js-single-item__location').find_all('span')[-1].text.strip()
            except:
                key_list[20] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad title
            try:
                key_list[21] = soup.find('h1', class_='title-announcement').text.strip()
            except:
                key_list[21] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad description
            try:
                key_list[22] = soup.find('div', class_='announcement-description').text.strip()
            except:
                key_list[22] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # saving datas to list
            csv_list.append(dict(zip(header, key_list)))
            successful += 1
            
        except Exception as e:
            continue

    # save the data to csv
    filename = f"Unegui_bayanzurkh_{date.today().strftime('%Y%m%d')}.csv"
    with codecs.open(filename, 'w', 'utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(csv_list)
    
    df = pd.DataFrame(csv_list)
    print(df.head())
    
except Exception as e:
    pass
    
finally:
    driver.quit()

     –®–∞–ª:     –¢–∞–≥—Ç:   –ì–∞—Ä–∞–∂:       –¶–æ–Ω—Ö: –•–∞–∞–ª–≥–∞: –¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:  \
0  –ü–∞—Ä–∫–µ—Ç   –¢–∞–≥—Ç–≥“Ø–π  –ë–∞–π—Ö–≥“Ø–π  –¢”©–º”©—Ä–≤–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          8   
1  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π       –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          4   
2  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π       –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          2   
3  –ü–∞—Ä–∫–µ—Ç  2—Ç–∞–≥—Ç—Ç–∞–π   –ë–∞–π–≥–∞–∞       –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          6   
4  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π       –í–∞–∫—É–º   –¢”©–º”©—Ä          3   

         –ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü –ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω: –ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:   –¢–∞–ª–±–∞–π:  ...  \
0  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä–æ–æ–≥“Ø–π                2025              10  316.97–º¬≤  ...   
1    –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω                2007               9    77.2–º¬≤  ...   
2    –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω                2008               9    40.4–º¬≤  ...   
3    –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—

# Bayangol district

In [8]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import codecs
import csv
import time
import pandas as pd
from datetime import date
import re

# column names such as floor type, balcony, garage, window type etc
header = ['–®–∞–ª:', '–¢–∞–≥—Ç:', '–ì–∞—Ä–∞–∂:', '–¶–æ–Ω—Ö:', '–•–∞–∞–ª–≥–∞:', '–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', '–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 
          '–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', '–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', '–¢–∞–ª–±–∞–π:', '–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 
          '–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', '–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', '–î“Ø“Ø—Ä—ç–≥:', '–ë–∞–π—Ä—à–∏–ª:', '“Æ–∑—Å—ç–Ω:', 
          'Scraped_date:', 'Posted_date:', 'Link:', '“Æ–Ω—ç:', '”®—Ä”©”©–Ω–∏–π –¢–æ–æ:', 
          '–ó–∞—Ä—ã–Ω –≥–∞—Ä—á–∏–≥:', '–ó–∞—Ä—ã–Ω –¢–∞–π–ª–±–∞—Ä:']

csv_list = []

# searches for specific property attribute in the list and extract the value
def key_finder(key, index, span_list, key_list):
    for item in span_list:
        if str(key) in item:
            key_list[index] = item.split(':')[1]
    if type(key_list[int(index)]) != str:
        key_list[int(index)] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

# extract all unique property ad links from the page
def extract_listing_links(soup):
    listing_links = set()
    pattern = re.compile(r'^/adv/\d+_[a-z0-9-]+/?$')
    
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if pattern.match(href):
            if not any(exclude in href for exclude in ['?', '#', 'page=', 'sort=', 'view=']):
                listing_links.add(href)
    
    return listing_links

# configuring Chrome options
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# undetected Chrome driver for avoiding bot
driver = uc.Chrome(options=options, version_main=None) 
wait = WebDriverWait(driver, 15)

try:
    base_url = 'https://www.unegui.mn/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/ub-bayangol/'   

    # navigate to base url
    driver.get(base_url)
    time.sleep(5)  
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    # pagination variables
    all_listing_urls = set()
    current_page = 1
    max_pages = 2  
    no_new_count = 0

    # loop through pages and collect ad links
    while current_page <= max_pages:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        page_listings = extract_listing_links(soup)
        
        new_listings = page_listings - all_listing_urls

        # stop if no new ad on 2 pages
        if len(new_listings) == 0:
            no_new_count += 1
            if no_new_count >= 2:
                break
        else:
            no_new_count = 0
            all_listing_urls.update(new_listings)
        
        # to next page
        if current_page < max_pages:
            next_page_url = f"{base_url}?page={current_page + 1}"
            driver.get(next_page_url)
            time.sleep(4)
            
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            current_page += 1
        else:
            break

    # converting links
    
    listing_urls = [f"https://www.unegui.mn{link}" for link in all_listing_urls]
    
    if len(listing_urls) == 0:
        driver.quit()
        exit()
    
    # scraping each ad
    start_time = time.time()
    successful = 0

    #going through each and collect data
    for i, listing_url in enumerate(listing_urls, 1):
        try:
            # go to each ad
            driver.get(listing_url)
            time.sleep(1.5)
            
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

            # parsing page html
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            key_list = ["–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"] * len(header)
            
            try:
                details = soup.find('div', class_='announcement-characteristics clearfix').find_all('li')
                span_list = [item.text.replace('\n', '').replace(' ', '') for item in details]
            except:
                span_list = []

            # extracting specific datas
            key_finder('–®–∞–ª:', 0, span_list, key_list) #floor
            key_finder('–¢–∞–≥—Ç:', 1, span_list, key_list) #balcony
            key_finder('–ì–∞—Ä–∞–∂:', 2, span_list, key_list) #garage
            key_finder('–¶–æ–Ω—Ö:', 3, span_list, key_list) #window
            key_finder('–•–∞–∞–ª–≥–∞:', 4, span_list, key_list) #door
            key_finder('–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', 5, span_list, key_list) #number of windows
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 6, span_list, key_list) #construction progress
            key_finder('–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', 7, span_list, key_list) #built year
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', 8, span_list, key_list) #total floor
            key_finder('–¢–∞–ª–±–∞–π:', 9, span_list, key_list) #size
            key_finder('–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 10, span_list, key_list) #located floor
            key_finder('–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', 11, span_list, key_list) #elevator
            key_finder('–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', 12, span_list, key_list) #payment term

            # get address
            try:
                address = soup.find('span', itemprop="address").text.split('‚Äî')
                key_list[13] = address[0].strip() if len(address) > 0 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
                key_list[14] = address[1].strip() if len(address) > 1 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            except:
                key_list[13] = key_list[14] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # get view count
            try:
                key_list[15] = soup.find('span', class_='counter-views').text.strip()
            except:
                key_list[15] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            
            # scraped date as today
            key_list[16] = date.today().strftime("%Y/%m/%d")

            # posted date
            try:
                key_list[17] = soup.find('span', class_='date-meta').text.strip()
            except:
                key_list[17] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad link
            key_list[18] = listing_url

            # price
            try:
                key_list[19] = soup.find('meta', itemprop='price')['content']
            except:
                key_list[19] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # number of room
            try:
                key_list[20] = soup.find('div', class_='wrap js-single-item__location').find_all('span')[-1].text.strip()
            except:
                key_list[20] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad title
            try:
                key_list[21] = soup.find('h1', class_='title-announcement').text.strip()
            except:
                key_list[21] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad description
            try:
                key_list[22] = soup.find('div', class_='announcement-description').text.strip()
            except:
                key_list[22] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # saving datas to list
            csv_list.append(dict(zip(header, key_list)))
            successful += 1
            
        except Exception as e:
            continue

    # save the data to csv
    filename = f"Unegui_bayanzurkh_{date.today().strftime('%Y%m%d')}.csv"
    with codecs.open(filename, 'w', 'utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(csv_list)
    
    df = pd.DataFrame(csv_list)
    print(df.head())
    
except Exception as e:
    pass
    
finally:
    driver.quit()

     –®–∞–ª:     –¢–∞–≥—Ç:   –ì–∞—Ä–∞–∂:       –¶–æ–Ω—Ö:     –•–∞–∞–ª–≥–∞: –¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:  \
0  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π       –í–∞–∫—É–º      –ë“Ø—Ä–≥—ç–¥          3   
1  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π       –í–∞–∫—É–º      –ë“Ø—Ä–≥—ç–¥          2   
2  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –¢”©–º”©—Ä–≤–∞–∫—É–º  –¢”©–º”©—Ä–≤–∞–∫—É–º          7   
3  –ü–∞—Ä–∫–µ—Ç   –¢–∞–≥—Ç–≥“Ø–π  –ë–∞–π—Ö–≥“Ø–π       –í–∞–∫—É–º      –ë“Ø—Ä–≥—ç–¥          2   
4  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π       –í–∞–∫—É–º      –ë“Ø—Ä–≥—ç–¥          2   

       –ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü –ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω: –ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:  –¢–∞–ª–±–∞–π:  ... –î“Ø“Ø—Ä—ç–≥:  \
0  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω                2006               6   46.0–º¬≤  ...      –£–ë   
1  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω                2016              12  55.65–º¬≤  ...      –£–ë   
2  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω                2024              16

# Chingeltei district

In [9]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import codecs
import csv
import time
import pandas as pd
from datetime import date
import re

# column names such as floor type, balcony, garage, window type etc
header = ['–®–∞–ª:', '–¢–∞–≥—Ç:', '–ì–∞—Ä–∞–∂:', '–¶–æ–Ω—Ö:', '–•–∞–∞–ª–≥–∞:', '–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', '–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 
          '–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', '–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', '–¢–∞–ª–±–∞–π:', '–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 
          '–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', '–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', '–î“Ø“Ø—Ä—ç–≥:', '–ë–∞–π—Ä—à–∏–ª:', '“Æ–∑—Å—ç–Ω:', 
          'Scraped_date:', 'Posted_date:', 'Link:', '“Æ–Ω—ç:', '”®—Ä”©”©–Ω–∏–π –¢–æ–æ:', 
          '–ó–∞—Ä—ã–Ω –≥–∞—Ä—á–∏–≥:', '–ó–∞—Ä—ã–Ω –¢–∞–π–ª–±–∞—Ä:']

csv_list = []

# searches for specific property attribute in the list and extract the value
def key_finder(key, index, span_list, key_list):
    for item in span_list:
        if str(key) in item:
            key_list[index] = item.split(':')[1]
    if type(key_list[int(index)]) != str:
        key_list[int(index)] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

# extract all unique property ad links from the page
def extract_listing_links(soup):
    listing_links = set()
    pattern = re.compile(r'^/adv/\d+_[a-z0-9-]+/?$')
    
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if pattern.match(href):
            if not any(exclude in href for exclude in ['?', '#', 'page=', 'sort=', 'view=']):
                listing_links.add(href)
    
    return listing_links

# configuring Chrome options
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# undetected Chrome driver for avoiding bot
driver = uc.Chrome(options=options, version_main=None) 
wait = WebDriverWait(driver, 15)

try:
    base_url = 'https://www.unegui.mn/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/ub-chingeltej/'   

    # navigate to base url
    driver.get(base_url)
    time.sleep(5)  
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    # pagination variables
    all_listing_urls = set()
    current_page = 1
    max_pages = 2  
    no_new_count = 0

    # loop through pages and collect ad links
    while current_page <= max_pages:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        page_listings = extract_listing_links(soup)
        
        new_listings = page_listings - all_listing_urls

        # stop if no new ad on 2 pages
        if len(new_listings) == 0:
            no_new_count += 1
            if no_new_count >= 2:
                break
        else:
            no_new_count = 0
            all_listing_urls.update(new_listings)
        
        # to next page
        if current_page < max_pages:
            next_page_url = f"{base_url}?page={current_page + 1}"
            driver.get(next_page_url)
            time.sleep(4)
            
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            current_page += 1
        else:
            break

    # converting links
    
    listing_urls = [f"https://www.unegui.mn{link}" for link in all_listing_urls]
    
    if len(listing_urls) == 0:
        driver.quit()
        exit()
    
    # scraping each ad
    start_time = time.time()
    successful = 0

    #going through each and collect data
    for i, listing_url in enumerate(listing_urls, 1):
        try:
            # go to each ad
            driver.get(listing_url)
            time.sleep(1.5)
            
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

            # parsing page html
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            key_list = ["–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"] * len(header)
            
            try:
                details = soup.find('div', class_='announcement-characteristics clearfix').find_all('li')
                span_list = [item.text.replace('\n', '').replace(' ', '') for item in details]
            except:
                span_list = []

            # extracting specific datas
            key_finder('–®–∞–ª:', 0, span_list, key_list) #floor
            key_finder('–¢–∞–≥—Ç:', 1, span_list, key_list) #balcony
            key_finder('–ì–∞—Ä–∞–∂:', 2, span_list, key_list) #garage
            key_finder('–¶–æ–Ω—Ö:', 3, span_list, key_list) #window
            key_finder('–•–∞–∞–ª–≥–∞:', 4, span_list, key_list) #door
            key_finder('–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', 5, span_list, key_list) #number of windows
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 6, span_list, key_list) #construction progress
            key_finder('–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', 7, span_list, key_list) #built year
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', 8, span_list, key_list) #total floor
            key_finder('–¢–∞–ª–±–∞–π:', 9, span_list, key_list) #size
            key_finder('–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 10, span_list, key_list) #located floor
            key_finder('–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', 11, span_list, key_list) #elevator
            key_finder('–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', 12, span_list, key_list) #payment term

            # get address
            try:
                address = soup.find('span', itemprop="address").text.split('‚Äî')
                key_list[13] = address[0].strip() if len(address) > 0 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
                key_list[14] = address[1].strip() if len(address) > 1 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            except:
                key_list[13] = key_list[14] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # get view count
            try:
                key_list[15] = soup.find('span', class_='counter-views').text.strip()
            except:
                key_list[15] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            
            # scraped date as today
            key_list[16] = date.today().strftime("%Y/%m/%d")

            # posted date
            try:
                key_list[17] = soup.find('span', class_='date-meta').text.strip()
            except:
                key_list[17] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad link
            key_list[18] = listing_url

            # price
            try:
                key_list[19] = soup.find('meta', itemprop='price')['content']
            except:
                key_list[19] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # number of room
            try:
                key_list[20] = soup.find('div', class_='wrap js-single-item__location').find_all('span')[-1].text.strip()
            except:
                key_list[20] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad title
            try:
                key_list[21] = soup.find('h1', class_='title-announcement').text.strip()
            except:
                key_list[21] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad description
            try:
                key_list[22] = soup.find('div', class_='announcement-description').text.strip()
            except:
                key_list[22] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # saving datas to list
            csv_list.append(dict(zip(header, key_list)))
            successful += 1
            
        except Exception as e:
            continue

    # save the data to csv
    filename = f"Unegui_bayanzurkh_{date.today().strftime('%Y%m%d')}.csv"
    with codecs.open(filename, 'w', 'utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(csv_list)
    
    df = pd.DataFrame(csv_list)
    print(df.head())
    
except Exception as e:
    pass
    
finally:
    driver.quit()

     –®–∞–ª:     –¢–∞–≥—Ç:   –ì–∞—Ä–∞–∂:  –¶–æ–Ω—Ö: –•–∞–∞–ª–≥–∞: –¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:      –ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü  \
0  –¶–µ–º–µ–Ω—Ç  2—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          4  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
1  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          5  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
2  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          5  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
3  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º   –¢”©–º”©—Ä          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
4  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   

  –ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω: –ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:  –¢–∞–ª–±–∞–π:  ... –î“Ø“Ø—Ä—ç–≥:  \
0                2015               3   23.1–º¬≤  ...      –£–ë   
1                2000               6  100.0–º¬≤  ...      –£–ë   
2                2008              12

# Khan Uul district

In [10]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import codecs
import csv
import time
import pandas as pd
from datetime import date
import re

# column names such as floor type, balcony, garage, window type etc
header = ['–®–∞–ª:', '–¢–∞–≥—Ç:', '–ì–∞—Ä–∞–∂:', '–¶–æ–Ω—Ö:', '–•–∞–∞–ª–≥–∞:', '–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', '–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 
          '–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', '–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', '–¢–∞–ª–±–∞–π:', '–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 
          '–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', '–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', '–î“Ø“Ø—Ä—ç–≥:', '–ë–∞–π—Ä—à–∏–ª:', '“Æ–∑—Å—ç–Ω:', 
          'Scraped_date:', 'Posted_date:', 'Link:', '“Æ–Ω—ç:', '”®—Ä”©”©–Ω–∏–π –¢–æ–æ:', 
          '–ó–∞—Ä—ã–Ω –≥–∞—Ä—á–∏–≥:', '–ó–∞—Ä—ã–Ω –¢–∞–π–ª–±–∞—Ä:']

csv_list = []

# searches for specific property attribute in the list and extract the value
def key_finder(key, index, span_list, key_list):
    for item in span_list:
        if str(key) in item:
            key_list[index] = item.split(':')[1]
    if type(key_list[int(index)]) != str:
        key_list[int(index)] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

# extract all unique property ad links from the page
def extract_listing_links(soup):
    listing_links = set()
    pattern = re.compile(r'^/adv/\d+_[a-z0-9-]+/?$')
    
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if pattern.match(href):
            if not any(exclude in href for exclude in ['?', '#', 'page=', 'sort=', 'view=']):
                listing_links.add(href)
    
    return listing_links

# configuring Chrome options
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# undetected Chrome driver for avoiding bot
driver = uc.Chrome(options=options, version_main=None) 
wait = WebDriverWait(driver, 15)

try:
    base_url = 'https://www.unegui.mn/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/ub-hanuul/'   

    # navigate to base url
    driver.get(base_url)
    time.sleep(5)  
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    # pagination variables
    all_listing_urls = set()
    current_page = 1
    max_pages = 2  
    no_new_count = 0

    # loop through pages and collect ad links
    while current_page <= max_pages:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        page_listings = extract_listing_links(soup)
        
        new_listings = page_listings - all_listing_urls

        # stop if no new ad on 2 pages
        if len(new_listings) == 0:
            no_new_count += 1
            if no_new_count >= 2:
                break
        else:
            no_new_count = 0
            all_listing_urls.update(new_listings)
        
        # to next page
        if current_page < max_pages:
            next_page_url = f"{base_url}?page={current_page + 1}"
            driver.get(next_page_url)
            time.sleep(4)
            
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            current_page += 1
        else:
            break

    # converting links
    
    listing_urls = [f"https://www.unegui.mn{link}" for link in all_listing_urls]
    
    if len(listing_urls) == 0:
        driver.quit()
        exit()
    
    # scraping each ad
    start_time = time.time()
    successful = 0

    #going through each and collect data
    for i, listing_url in enumerate(listing_urls, 1):
        try:
            # go to each ad
            driver.get(listing_url)
            time.sleep(1.5)
            
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

            # parsing page html
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            key_list = ["–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"] * len(header)
            
            try:
                details = soup.find('div', class_='announcement-characteristics clearfix').find_all('li')
                span_list = [item.text.replace('\n', '').replace(' ', '') for item in details]
            except:
                span_list = []

            # extracting specific datas
            key_finder('–®–∞–ª:', 0, span_list, key_list) #floor
            key_finder('–¢–∞–≥—Ç:', 1, span_list, key_list) #balcony
            key_finder('–ì–∞—Ä–∞–∂:', 2, span_list, key_list) #garage
            key_finder('–¶–æ–Ω—Ö:', 3, span_list, key_list) #window
            key_finder('–•–∞–∞–ª–≥–∞:', 4, span_list, key_list) #door
            key_finder('–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', 5, span_list, key_list) #number of windows
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 6, span_list, key_list) #construction progress
            key_finder('–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', 7, span_list, key_list) #built year
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', 8, span_list, key_list) #total floor
            key_finder('–¢–∞–ª–±–∞–π:', 9, span_list, key_list) #size
            key_finder('–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 10, span_list, key_list) #located floor
            key_finder('–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', 11, span_list, key_list) #elevator
            key_finder('–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', 12, span_list, key_list) #payment term

            # get address
            try:
                address = soup.find('span', itemprop="address").text.split('‚Äî')
                key_list[13] = address[0].strip() if len(address) > 0 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
                key_list[14] = address[1].strip() if len(address) > 1 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            except:
                key_list[13] = key_list[14] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # get view count
            try:
                key_list[15] = soup.find('span', class_='counter-views').text.strip()
            except:
                key_list[15] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            
            # scraped date as today
            key_list[16] = date.today().strftime("%Y/%m/%d")

            # posted date
            try:
                key_list[17] = soup.find('span', class_='date-meta').text.strip()
            except:
                key_list[17] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad link
            key_list[18] = listing_url

            # price
            try:
                key_list[19] = soup.find('meta', itemprop='price')['content']
            except:
                key_list[19] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # number of room
            try:
                key_list[20] = soup.find('div', class_='wrap js-single-item__location').find_all('span')[-1].text.strip()
            except:
                key_list[20] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad title
            try:
                key_list[21] = soup.find('h1', class_='title-announcement').text.strip()
            except:
                key_list[21] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad description
            try:
                key_list[22] = soup.find('div', class_='announcement-description').text.strip()
            except:
                key_list[22] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # saving datas to list
            csv_list.append(dict(zip(header, key_list)))
            successful += 1
            
        except Exception as e:
            continue

    # save the data to csv
    filename = f"Unegui_bayanzurkh_{date.today().strftime('%Y%m%d')}.csv"
    with codecs.open(filename, 'w', 'utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(csv_list)
    
    df = pd.DataFrame(csv_list)
    print(df.head())
    
except Exception as e:
    pass
    
finally:
    driver.quit()

      –®–∞–ª:     –¢–∞–≥—Ç:   –ì–∞—Ä–∞–∂:  –¶–æ–Ω—Ö: –•–∞–∞–ª–≥–∞: –¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:      –ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü  \
0   –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          4  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
1   –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º   –¢”©–º”©—Ä          5  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
2  –õ–∞–º–∏–Ω–∞—Ç  1—Ç–∞–≥—Ç—Ç–∞–π   –ë–∞–π–≥–∞–∞  –í–∞–∫—É–º   –¢”©–º”©—Ä          4  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
3   –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
4   –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          6  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   

  –ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω: –ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:  –¢–∞–ª–±–∞–π:  ... –î“Ø“Ø—Ä—ç–≥:  \
0                2024              23  109.0–º¬≤  ...      –£–ë   
1                2024              25  79.98–º¬≤  ...      –£–ë   
2                2024           

# Songinokhairkhan district

In [11]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import codecs
import csv
import time
import pandas as pd
from datetime import date
import re

# column names such as floor type, balcony, garage, window type etc
header = ['–®–∞–ª:', '–¢–∞–≥—Ç:', '–ì–∞—Ä–∞–∂:', '–¶–æ–Ω—Ö:', '–•–∞–∞–ª–≥–∞:', '–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', '–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 
          '–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', '–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', '–¢–∞–ª–±–∞–π:', '–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 
          '–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', '–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', '–î“Ø“Ø—Ä—ç–≥:', '–ë–∞–π—Ä—à–∏–ª:', '“Æ–∑—Å—ç–Ω:', 
          'Scraped_date:', 'Posted_date:', 'Link:', '“Æ–Ω—ç:', '”®—Ä”©”©–Ω–∏–π –¢–æ–æ:', 
          '–ó–∞—Ä—ã–Ω –≥–∞—Ä—á–∏–≥:', '–ó–∞—Ä—ã–Ω –¢–∞–π–ª–±–∞—Ä:']

csv_list = []

# searches for specific property attribute in the list and extract the value
def key_finder(key, index, span_list, key_list):
    for item in span_list:
        if str(key) in item:
            key_list[index] = item.split(':')[1]
    if type(key_list[int(index)]) != str:
        key_list[int(index)] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

# extract all unique property ad links from the page
def extract_listing_links(soup):
    listing_links = set()
    pattern = re.compile(r'^/adv/\d+_[a-z0-9-]+/?$')
    
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if pattern.match(href):
            if not any(exclude in href for exclude in ['?', '#', 'page=', 'sort=', 'view=']):
                listing_links.add(href)
    
    return listing_links

# configuring Chrome options
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# undetected Chrome driver for avoiding bot
driver = uc.Chrome(options=options, version_main=None) 
wait = WebDriverWait(driver, 15)

try:
    base_url = 'https://www.unegui.mn/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/ub-songinohajrhan/'   

    # navigate to base url
    driver.get(base_url)
    time.sleep(5)  
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    # pagination variables
    all_listing_urls = set()
    current_page = 1
    max_pages = 2  
    no_new_count = 0

    # loop through pages and collect ad links
    while current_page <= max_pages:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        page_listings = extract_listing_links(soup)
        
        new_listings = page_listings - all_listing_urls

        # stop if no new ad on 2 pages
        if len(new_listings) == 0:
            no_new_count += 1
            if no_new_count >= 2:
                break
        else:
            no_new_count = 0
            all_listing_urls.update(new_listings)
        
        # to next page
        if current_page < max_pages:
            next_page_url = f"{base_url}?page={current_page + 1}"
            driver.get(next_page_url)
            time.sleep(4)
            
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            current_page += 1
        else:
            break

    # converting links
    
    listing_urls = [f"https://www.unegui.mn{link}" for link in all_listing_urls]
    
    if len(listing_urls) == 0:
        driver.quit()
        exit()
    
    # scraping each ad
    start_time = time.time()
    successful = 0

    #going through each and collect data
    for i, listing_url in enumerate(listing_urls, 1):
        try:
            # go to each ad
            driver.get(listing_url)
            time.sleep(1.5)
            
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

            # parsing page html
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            key_list = ["–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"] * len(header)
            
            try:
                details = soup.find('div', class_='announcement-characteristics clearfix').find_all('li')
                span_list = [item.text.replace('\n', '').replace(' ', '') for item in details]
            except:
                span_list = []

            # extracting specific datas
            key_finder('–®–∞–ª:', 0, span_list, key_list) #floor
            key_finder('–¢–∞–≥—Ç:', 1, span_list, key_list) #balcony
            key_finder('–ì–∞—Ä–∞–∂:', 2, span_list, key_list) #garage
            key_finder('–¶–æ–Ω—Ö:', 3, span_list, key_list) #window
            key_finder('–•–∞–∞–ª–≥–∞:', 4, span_list, key_list) #door
            key_finder('–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', 5, span_list, key_list) #number of windows
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 6, span_list, key_list) #construction progress
            key_finder('–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', 7, span_list, key_list) #built year
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', 8, span_list, key_list) #total floor
            key_finder('–¢–∞–ª–±–∞–π:', 9, span_list, key_list) #size
            key_finder('–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 10, span_list, key_list) #located floor
            key_finder('–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', 11, span_list, key_list) #elevator
            key_finder('–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', 12, span_list, key_list) #payment term

            # get address
            try:
                address = soup.find('span', itemprop="address").text.split('‚Äî')
                key_list[13] = address[0].strip() if len(address) > 0 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
                key_list[14] = address[1].strip() if len(address) > 1 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            except:
                key_list[13] = key_list[14] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # get view count
            try:
                key_list[15] = soup.find('span', class_='counter-views').text.strip()
            except:
                key_list[15] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            
            # scraped date as today
            key_list[16] = date.today().strftime("%Y/%m/%d")

            # posted date
            try:
                key_list[17] = soup.find('span', class_='date-meta').text.strip()
            except:
                key_list[17] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad link
            key_list[18] = listing_url

            # price
            try:
                key_list[19] = soup.find('meta', itemprop='price')['content']
            except:
                key_list[19] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # number of room
            try:
                key_list[20] = soup.find('div', class_='wrap js-single-item__location').find_all('span')[-1].text.strip()
            except:
                key_list[20] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad title
            try:
                key_list[21] = soup.find('h1', class_='title-announcement').text.strip()
            except:
                key_list[21] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad description
            try:
                key_list[22] = soup.find('div', class_='announcement-description').text.strip()
            except:
                key_list[22] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # saving datas to list
            csv_list.append(dict(zip(header, key_list)))
            successful += 1
            
        except Exception as e:
            continue

    # save the data to csv
    filename = f"Unegui_bayanzurkh_{date.today().strftime('%Y%m%d')}.csv"
    with codecs.open(filename, 'w', 'utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(csv_list)
    
    df = pd.DataFrame(csv_list)
    print(df.head())
    
except Exception as e:
    pass
    
finally:
    driver.quit()

     –®–∞–ª:     –¢–∞–≥—Ç:   –ì–∞—Ä–∞–∂:  –¶–æ–Ω—Ö:     –•–∞–∞–ª–≥–∞: –¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:      –ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü  \
0  –ü–∞—Ä–∫–µ—Ç   –¢–∞–≥—Ç–≥“Ø–π   –ë–∞–π–≥–∞–∞  –í–∞–∫—É–º      –ë“Ø—Ä–≥—ç–¥          2  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
1  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π   –ë–∞–π–≥–∞–∞  –í–∞–∫—É–º       –¢”©–º”©—Ä          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
2  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –¢”©–º”©—Ä–≤–∞–∫—É–º          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
3  –ü–∞—Ä–∫–µ—Ç  2—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º      –ë“Ø—Ä–≥—ç–¥          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
4  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π   –ë–∞–π–≥–∞–∞  –í–∞–∫—É–º       –¢”©–º”©—Ä          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   

  –ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω: –ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:  –¢–∞–ª–±–∞–π:  ... –î“Ø“Ø—Ä—ç–≥:  \
0                2025               3   57.7–º¬≤  ...      –£–ë   
1                2025              15  78.49–º¬≤  ...      –£–ë   
2            

# Baganuur district

In [12]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import codecs
import csv
import time
import pandas as pd
from datetime import date
import re

# column names such as floor type, balcony, garage, window type etc
header = ['–®–∞–ª:', '–¢–∞–≥—Ç:', '–ì–∞—Ä–∞–∂:', '–¶–æ–Ω—Ö:', '–•–∞–∞–ª–≥–∞:', '–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', '–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 
          '–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', '–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', '–¢–∞–ª–±–∞–π:', '–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 
          '–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', '–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', '–î“Ø“Ø—Ä—ç–≥:', '–ë–∞–π—Ä—à–∏–ª:', '“Æ–∑—Å—ç–Ω:', 
          'Scraped_date:', 'Posted_date:', 'Link:', '“Æ–Ω—ç:', '”®—Ä”©”©–Ω–∏–π –¢–æ–æ:', 
          '–ó–∞—Ä—ã–Ω –≥–∞—Ä—á–∏–≥:', '–ó–∞—Ä—ã–Ω –¢–∞–π–ª–±–∞—Ä:']

csv_list = []

# searches for specific property attribute in the list and extract the value
def key_finder(key, index, span_list, key_list):
    for item in span_list:
        if str(key) in item:
            key_list[index] = item.split(':')[1]
    if type(key_list[int(index)]) != str:
        key_list[int(index)] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

# extract all unique property ad links from the page
def extract_listing_links(soup):
    listing_links = set()
    pattern = re.compile(r'^/adv/\d+_[a-z0-9-]+/?$')
    
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if pattern.match(href):
            if not any(exclude in href for exclude in ['?', '#', 'page=', 'sort=', 'view=']):
                listing_links.add(href)
    
    return listing_links

# configuring Chrome options
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# undetected Chrome driver for avoiding bot
driver = uc.Chrome(options=options, version_main=None) 
wait = WebDriverWait(driver, 15)

try:
    base_url = 'https://www.unegui.mn/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/ub-baganuur/'   

    # navigate to base url
    driver.get(base_url)
    time.sleep(5)  
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    # pagination variables
    all_listing_urls = set()
    current_page = 1
    max_pages = 2  
    no_new_count = 0

    # loop through pages and collect ad links
    while current_page <= max_pages:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        page_listings = extract_listing_links(soup)
        
        new_listings = page_listings - all_listing_urls

        # stop if no new ad on 2 pages
        if len(new_listings) == 0:
            no_new_count += 1
            if no_new_count >= 2:
                break
        else:
            no_new_count = 0
            all_listing_urls.update(new_listings)
        
        # to next page
        if current_page < max_pages:
            next_page_url = f"{base_url}?page={current_page + 1}"
            driver.get(next_page_url)
            time.sleep(4)
            
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            current_page += 1
        else:
            break

    # converting links
    
    listing_urls = [f"https://www.unegui.mn{link}" for link in all_listing_urls]
    
    if len(listing_urls) == 0:
        driver.quit()
        exit()
    
    # scraping each ad
    start_time = time.time()
    successful = 0

    #going through each and collect data
    for i, listing_url in enumerate(listing_urls, 1):
        try:
            # go to each ad
            driver.get(listing_url)
            time.sleep(1.5)
            
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

            # parsing page html
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            key_list = ["–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"] * len(header)
            
            try:
                details = soup.find('div', class_='announcement-characteristics clearfix').find_all('li')
                span_list = [item.text.replace('\n', '').replace(' ', '') for item in details]
            except:
                span_list = []

            # extracting specific datas
            key_finder('–®–∞–ª:', 0, span_list, key_list) #floor
            key_finder('–¢–∞–≥—Ç:', 1, span_list, key_list) #balcony
            key_finder('–ì–∞—Ä–∞–∂:', 2, span_list, key_list) #garage
            key_finder('–¶–æ–Ω—Ö:', 3, span_list, key_list) #window
            key_finder('–•–∞–∞–ª–≥–∞:', 4, span_list, key_list) #door
            key_finder('–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', 5, span_list, key_list) #number of windows
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 6, span_list, key_list) #construction progress
            key_finder('–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', 7, span_list, key_list) #built year
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', 8, span_list, key_list) #total floor
            key_finder('–¢–∞–ª–±–∞–π:', 9, span_list, key_list) #size
            key_finder('–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 10, span_list, key_list) #located floor
            key_finder('–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', 11, span_list, key_list) #elevator
            key_finder('–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', 12, span_list, key_list) #payment term

            # get address
            try:
                address = soup.find('span', itemprop="address").text.split('‚Äî')
                key_list[13] = address[0].strip() if len(address) > 0 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
                key_list[14] = address[1].strip() if len(address) > 1 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            except:
                key_list[13] = key_list[14] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # get view count
            try:
                key_list[15] = soup.find('span', class_='counter-views').text.strip()
            except:
                key_list[15] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            
            # scraped date as today
            key_list[16] = date.today().strftime("%Y/%m/%d")

            # posted date
            try:
                key_list[17] = soup.find('span', class_='date-meta').text.strip()
            except:
                key_list[17] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad link
            key_list[18] = listing_url

            # price
            try:
                key_list[19] = soup.find('meta', itemprop='price')['content']
            except:
                key_list[19] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # number of room
            try:
                key_list[20] = soup.find('div', class_='wrap js-single-item__location').find_all('span')[-1].text.strip()
            except:
                key_list[20] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad title
            try:
                key_list[21] = soup.find('h1', class_='title-announcement').text.strip()
            except:
                key_list[21] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad description
            try:
                key_list[22] = soup.find('div', class_='announcement-description').text.strip()
            except:
                key_list[22] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # saving datas to list
            csv_list.append(dict(zip(header, key_list)))
            successful += 1
            
        except Exception as e:
            continue

    # save the data to csv
    filename = f"Unegui_bayanzurkh_{date.today().strftime('%Y%m%d')}.csv"
    with codecs.open(filename, 'w', 'utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(csv_list)
    
    df = pd.DataFrame(csv_list)
    print(df.head())
    
except Exception as e:
    pass
    
finally:
    driver.quit()

     –®–∞–ª:     –¢–∞–≥—Ç:   –ì–∞—Ä–∞–∂:  –¶–æ–Ω—Ö: –•–∞–∞–ª–≥–∞: –¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:      –ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü  \
0  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          2  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
1  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º   –¢”©–º”©—Ä          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
2  –ü–∞—Ä–∫–µ—Ç  2—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
3  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          2  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   

  –ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω: –ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä: –¢–∞–ª–±–∞–π:  ... –î“Ø“Ø—Ä—ç–≥:  \
0                1980               5  36.0–º¬≤  ...      –£–ë   
1                1986               5  47.0–º¬≤  ...      –£–ë   
2                2024               9  55.3–º¬≤  ...      –£–ë   
3                1985               5  34.0–º¬≤  ...      –£–ë   

                      –ë–∞–π

# Nalaikh district

In [13]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import codecs
import csv
import time
import pandas as pd
from datetime import date
import re

# column names such as floor type, balcony, garage, window type etc
header = ['–®–∞–ª:', '–¢–∞–≥—Ç:', '–ì–∞—Ä–∞–∂:', '–¶–æ–Ω—Ö:', '–•–∞–∞–ª–≥–∞:', '–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', '–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 
          '–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', '–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', '–¢–∞–ª–±–∞–π:', '–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 
          '–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', '–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', '–î“Ø“Ø—Ä—ç–≥:', '–ë–∞–π—Ä—à–∏–ª:', '“Æ–∑—Å—ç–Ω:', 
          'Scraped_date:', 'Posted_date:', 'Link:', '“Æ–Ω—ç:', '”®—Ä”©”©–Ω–∏–π –¢–æ–æ:', 
          '–ó–∞—Ä—ã–Ω –≥–∞—Ä—á–∏–≥:', '–ó–∞—Ä—ã–Ω –¢–∞–π–ª–±–∞—Ä:']

csv_list = []

# searches for specific property attribute in the list and extract the value
def key_finder(key, index, span_list, key_list):
    for item in span_list:
        if str(key) in item:
            key_list[index] = item.split(':')[1]
    if type(key_list[int(index)]) != str:
        key_list[int(index)] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

# extract all unique property ad links from the page
def extract_listing_links(soup):
    listing_links = set()
    pattern = re.compile(r'^/adv/\d+_[a-z0-9-]+/?$')
    
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if pattern.match(href):
            if not any(exclude in href for exclude in ['?', '#', 'page=', 'sort=', 'view=']):
                listing_links.add(href)
    
    return listing_links

# configuring Chrome options
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# undetected Chrome driver for avoiding bot
driver = uc.Chrome(options=options, version_main=None) 
wait = WebDriverWait(driver, 15)

try:
    base_url = 'https://www.unegui.mn/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/ub-nalajh/'   

    # navigate to base url
    driver.get(base_url)
    time.sleep(5)  
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    # pagination variables
    all_listing_urls = set()
    current_page = 1
    max_pages = 2  
    no_new_count = 0

    # loop through pages and collect ad links
    while current_page <= max_pages:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        page_listings = extract_listing_links(soup)
        
        new_listings = page_listings - all_listing_urls

        # stop if no new ad on 2 pages
        if len(new_listings) == 0:
            no_new_count += 1
            if no_new_count >= 2:
                break
        else:
            no_new_count = 0
            all_listing_urls.update(new_listings)
        
        # to next page
        if current_page < max_pages:
            next_page_url = f"{base_url}?page={current_page + 1}"
            driver.get(next_page_url)
            time.sleep(4)
            
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[href^='/adv/']")))
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            
            current_page += 1
        else:
            break

    # converting links
    
    listing_urls = [f"https://www.unegui.mn{link}" for link in all_listing_urls]
    
    if len(listing_urls) == 0:
        driver.quit()
        exit()
    
    # scraping each ad
    start_time = time.time()
    successful = 0

    #going through each and collect data
    for i, listing_url in enumerate(listing_urls, 1):
        try:
            # go to each ad
            driver.get(listing_url)
            time.sleep(1.5)
            
            wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))

            # parsing page html
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            key_list = ["–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"] * len(header)
            
            try:
                details = soup.find('div', class_='announcement-characteristics clearfix').find_all('li')
                span_list = [item.text.replace('\n', '').replace(' ', '') for item in details]
            except:
                span_list = []

            # extracting specific datas
            key_finder('–®–∞–ª:', 0, span_list, key_list) #floor
            key_finder('–¢–∞–≥—Ç:', 1, span_list, key_list) #balcony
            key_finder('–ì–∞—Ä–∞–∂:', 2, span_list, key_list) #garage
            key_finder('–¶–æ–Ω—Ö:', 3, span_list, key_list) #window
            key_finder('–•–∞–∞–ª–≥–∞:', 4, span_list, key_list) #door
            key_finder('–¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:', 5, span_list, key_list) #number of windows
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü', 6, span_list, key_list) #construction progress
            key_finder('–ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω:', 7, span_list, key_list) #built year
            key_finder('–ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:', 8, span_list, key_list) #total floor
            key_finder('–¢–∞–ª–±–∞–π:', 9, span_list, key_list) #size
            key_finder('–•—ç–¥—ç–Ω–¥–∞–≤—Ö–∞—Ä—Ç:', 10, span_list, key_list) #located floor
            key_finder('–¶–∞—Ö–∏–ª–≥–∞–∞–Ω—à–∞—Ç—Ç–∞–π—ç—Å—ç—Ö:', 11, span_list, key_list) #elevator
            key_finder('–¢”©–ª–±”©—Ä–∏–π–Ω–Ω”©—Ö—Ü”©–ª:', 12, span_list, key_list) #payment term

            # get address
            try:
                address = soup.find('span', itemprop="address").text.split('‚Äî')
                key_list[13] = address[0].strip() if len(address) > 0 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
                key_list[14] = address[1].strip() if len(address) > 1 else "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            except:
                key_list[13] = key_list[14] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # get view count
            try:
                key_list[15] = soup.find('span', class_='counter-views').text.strip()
            except:
                key_list[15] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"
            
            # scraped date as today
            key_list[16] = date.today().strftime("%Y/%m/%d")

            # posted date
            try:
                key_list[17] = soup.find('span', class_='date-meta').text.strip()
            except:
                key_list[17] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad link
            key_list[18] = listing_url

            # price
            try:
                key_list[19] = soup.find('meta', itemprop='price')['content']
            except:
                key_list[19] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # number of room
            try:
                key_list[20] = soup.find('div', class_='wrap js-single-item__location').find_all('span')[-1].text.strip()
            except:
                key_list[20] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad title
            try:
                key_list[21] = soup.find('h1', class_='title-announcement').text.strip()
            except:
                key_list[21] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # ad description
            try:
                key_list[22] = soup.find('div', class_='announcement-description').text.strip()
            except:
                key_list[22] = "–ú—ç–¥—ç—ç–ª—ç–ª –±–∞–π—Ö–≥“Ø–π"

            # saving datas to list
            csv_list.append(dict(zip(header, key_list)))
            successful += 1
            
        except Exception as e:
            continue

    # save the data to csv
    filename = f"Unegui_bayanzurkh_{date.today().strftime('%Y%m%d')}.csv"
    with codecs.open(filename, 'w', 'utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        writer.writerows(csv_list)
    
    df = pd.DataFrame(csv_list)
    print(df.head())
    
except Exception as e:
    pass
    
finally:
    driver.quit()

     –®–∞–ª:     –¢–∞–≥—Ç:   –ì–∞—Ä–∞–∂:  –¶–æ–Ω—Ö: –•–∞–∞–ª–≥–∞: –¶–æ–Ω—Ö–Ω—ã—Ç–æ–æ:      –ë–∞—Ä–∏–ª–≥—ã–Ω—è–≤—Ü  \
0  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          2  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
1  –ü–∞—Ä–∫–µ—Ç  2—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          5  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
2  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          4  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
3  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          4  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   
4  –ü–∞—Ä–∫–µ—Ç  1—Ç–∞–≥—Ç—Ç–∞–π  –ë–∞–π—Ö–≥“Ø–π  –í–∞–∫—É–º  –ë“Ø—Ä–≥—ç–¥          3  –ê—à–∏–≥–ª–∞–ª—Ç–∞–¥–æ—Ä—Å–æ–Ω   

  –ê—à–∏–≥–ª–∞–ª—Ç–∞–Ω–¥–æ—Ä—Å–æ–Ω–æ–Ω: –ë–∞—Ä–∏–ª–≥—ã–Ω–¥–∞–≤—Ö–∞—Ä:  –¢–∞–ª–±–∞–π:  ... –î“Ø“Ø—Ä—ç–≥:  \
0                2000               5   32.0–º¬≤  ...      –£–ë   
1                2014               5  77.56–º¬≤  ...      –£–ë   
2                2000               