In [1]:
import re
import csv
import time
from playwright.async_api import async_playwright
from parsel import Selector

In [2]:
playwright = await async_playwright().start()

browser = await playwright.chromium.launch(
    headless=False,
    channel="chrome",
    # slow_mo=500,
    args=["--disable-blink-features", "--disable-blink-features=AutomationControlled"],
    # timeout=0
)

context = await browser.new_context(
    user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
)

page = await context.new_page()
page.set_default_timeout(10000)


def extract_lat_long(url):
    pattern = r'@(-?\d+\.\d+),(-?\d+\.\d+)'
    match = re.search(pattern, url)
    if match:
        lat, long = match.groups()
        lat, long = float(lat), float(long)
        if lat < 3 or long < 101:
            return None, None
        else:
            return lat, long
    else:
        return None, None

print("== Code executed ===")

== Code executed ===


In [3]:
# Project name reader
with open('kuala_lumpur_project_name.csv', 'r') as f:
    reader = csv.DictReader(f)
    project_names = [row['project_name'] for row in reader]

feed_project_name = project_names[0:5]
print(feed_project_name)

['The Connaught One', 'Radium Adesa Desa East Residences', 'Noora @ Desa ParkCity', 'Sunway Artessa', 'Sfera']


In [4]:
await page.goto("https://www.google.com/maps")

<Response url='https://www.google.com/maps' request=<Request url='https://www.google.com/maps' method='GET'>>

In [5]:
data = []

In [6]:
for project_name in feed_project_name:
    await page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").fill(project_name)
    await page.get_by_role("button", name="খুঁজুন").click()
    await page.wait_for_timeout(3000)

    lat, long = extract_lat_long(page.url)

    meta_dic = {
        "_04_Project_Name": project_name,
        "_31_Latitude": lat,
        "_32_Longitude": long
    }
    data.append(meta_dic)
    
    
    await page.get_by_role("button", name="বন্ধ করুন").click()
    await page.wait_for_timeout(200)

print(f"======= loop finish ===========")

In [None]:
# Save data as CSV file
csv_filename = '1.csv'
fieldnames = [
    "_04_Project_Name",
    "_31_Latitude",
    "_32_Longitude",
]

with open(csv_filename, 'a', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data)

print("====Script finish===")

In [None]:
from playwright.sync_api import Playwright, sync_playwright, expect


def run(playwright: Playwright) -> None:
    browser = playwright.chromium.launch(headless=False)
    context = browser.new_context()
    page = context.new_page()
    page.goto("https://www.google.com/maps")
    page.goto("https://www.google.com/maps/@25.6133918,88.6390693,13z?entry=ttu")
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").click()
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").fill("D'Clover Residences")
    page.get_by_label("খুঁজুন", exact=True).click()
    page.get_by_role("button", name="বন্ধ করুন").click()
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").click()
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").fill("Amika Residences")
    page.get_by_role("button", name="খুঁজুন").click()
    page.get_by_role("button", name="বন্ধ করুন").click()
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").click()
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").fill("Jimbaran")
    page.get_by_label("দিকনির্দেশ", exact=True).click()
    page.get_by_label("দিকনির্দেশ বন্ধ করুন").click()
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").press("Control+z")
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").fill("Jimbaran")
    page.get_by_role("button", name="খুঁজুন").click()
    page.get_by_role("button", name="বন্ধ করুন").click()
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").click()
    page.get_by_role("textbox", name="Google Maps-এ খুঁজুন").fill("Quinton Residences")
    page.get_by_role("button", name="খুঁজুন").click()
    page.get_by_role("button", name="বন্ধ করুন").click()

    # ---------------------
    context.close()
    browser.close()


with sync_playwright() as playwright:
    run(playwright)


In [None]:
data = []

In [None]:
next_page_number_count = 0

for i in range(28):
    count = 0
    
    for i in range(1,21):
        count +=1
        print(f"\rProcessing profile {count}...", end="")
        xpath = f"(//div[@class='listing-widget-new']/div)[{i}]"
        button = await page.wait_for_selector(xpath)
        await button.click()
        await page.wait_for_timeout(800)

        html = await page.content()
        response = Selector(text=html)


        # Xpath code
        _01_Building_URL = page.url
        condo_address_raw = response.xpath("//div[@class='listing-address']//span/text()").get()
        _02_State, _03_Area_V1 = get_state_and_area(condo_address_raw)

        _03_Area_V2 = response.xpath("normalize-space((//ol[@class='breadcrumb']/li)[4]/a/span/text())").get()

        _04_Project_Name = response.xpath("normalize-space(//h1/text())").get()
        _05_Project_Type = response.xpath("//h4[contains(text(), 'project type')]/../../td[2]/text()").get()
        _06_Developer =  response.xpath("//h4[contains(text(), 'Developer')]/../../td[2]/text()").get()
        _07_Tenure = response.xpath("//h4[contains(text(), 'Tenure')]/../../td[2]/text()").get()
        _08_Year = response.xpath("//h4[contains(text(), 'Completion Year')]/../../td[2]/text()").get()
        _09_Floors = response.xpath("//h4[contains(text(), '# of Floors')]/../../td[2]/text()").get()
        _10_Total_Units = response.xpath("//h4[contains(text(), 'Total Units')]/../../td[2]/text()").get()
        _11_Total_Sell = response.xpath("//div[@class='sale-rent-units-cta']//a[@data-listing-type='Sale']/@data-units").get()
        _12_Total_Rent = response.xpath("//div[@class='sale-rent-units-cta']//a[@data-listing-type='Rent']/@data-units").get()

        sale_range_high = response.xpath("(//div[@class='price-overview-widget clearfix']//span[@class='element-label price']/text())[2]").get()
        sale_range_low = response.xpath("(//div[@class='price-overview-widget clearfix']//span[@class='element-label price']/text())[1]").get()
        rent_range_low = response.xpath("(//div[@class='price-overview-widget clearfix']//span[@class='element-label price']/text())[3]").get()
        rent_range_high = response.xpath("(//div[@class='price-overview-widget clearfix']//span[@class='element-label price']/text())[4]").get()
        _13_Sale_Price_Range = f"{sale_range_low} - {sale_range_high}"
        _14_Rent_Price_Range = f"{rent_range_low} - {rent_range_high}"
        _15_PSF = response.xpath("//h4[contains(text(), 'PSF')]/../../td[2]/text()").get()
    

        total_sale_url_raw = response.xpath("//div[@class='sale-rent-units-cta']//a[@data-listing-type='Sale']/@href").get()
        total_rent_url_raw = response.xpath("//div[@class='sale-rent-units-cta']//a[@data-listing-type='Rent']/@href").get()

        if total_sale_url_raw is not None:
            total_sale_url = f"https://www.propertyguru.com.my{total_sale_url_raw}"
        else:
            total_sale_url = None

        if total_rent_url_raw is not None:    
            total_rent_url = f"https://www.propertyguru.com.my{total_rent_url_raw}"
        else:
            total_rent_url = None

        meta_dic = {
            "_01_Building_URL": _01_Building_URL,
            "_02_State": _02_State,
            "_03_Area_V1":_03_Area_V1,
            "_03_Area_V2": _03_Area_V2,
            "_04_Project_Name": _04_Project_Name,
            "_05_Project_Type": _05_Project_Type,
            "_06_Developer": _06_Developer,
            "_07_Tenure": _07_Tenure,
            "_08_Year": holding_only_int(_08_Year),
            "_09_Floors": holding_only_int(_09_Floors),
            "_10_Total_Units": holding_only_int(_10_Total_Units),
            "_11_Total_Sell": _11_Total_Sell,
            "_12_Total_Rent": _12_Total_Rent,
            "_13_Sale_Price_Range": psf_clean(_13_Sale_Price_Range),
            "_14_Rent_Price_Range": psf_clean(_14_Rent_Price_Range),
            "_15_PSF": psf_clean(_15_PSF),
            "total_sale_url_raw": total_sale_url,
            "total_rent_url_raw": total_rent_url,
            }
        data.append(meta_dic)



        await page.go_back()
        await page.wait_for_timeout(800)
    
    try:
        await page.get_by_text("»").click()
    except:
        break

In [None]:
# Save data as CSV file
csv_filename = 'condo_se_v4.csv'
fieldnames = [
    "_01_Building_URL",
    "_02_State",
    "_03_Area_V1",
    "_03_Area_V2",
    "_04_Project_Name",
    "_05_Project_Type",
    "_06_Developer",
    "_07_Tenure",
    "_08_Year",
    "_09_Floors",
    "_10_Total_Units",
    "_11_Total_Sell",
    "_12_Total_Rent",
    "_13_Sale_Price_Range",
    "_14_Rent_Price_Range",
    "_15_PSF",
    "total_sale_url_raw",
    "total_rent_url_raw"
        ]

with open(csv_filename, 'a', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data)

print("====Script finish===")

In [None]:
urls = [
    "https://www.propertyguru.com.my/condo/astrum-ampang-17962",
    "https://www.propertyguru.com.my/condo/ppr-kampung-limau-17955",
    "https://www.propertyguru.com.my/condo/alaia-titiwangsa-17953",
    "https://www.propertyguru.com.my/condo/lily-apartment-kuchai-lama-17943",
    "https://www.propertyguru.com.my/condo/kl-east-east-57-17917",
    "https://www.propertyguru.com.my/condo/swnk-houze-bukit-bintang-city-centre-17907",
    "https://www.propertyguru.com.my/condo/river-park-bangsar-south-17901",
    "https://www.propertyguru.com.my/condo/m-nova-17893",
]