# Import Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import time
from selenium import webdriver
import pandas as pd
import numpy as np
import re
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows", None)

# Make Code Effectively Function

In [2]:
def scrape_data(url, url2, container_name, column_name, element, attrs, start, end):
    data = []  # List to store data

    for page in range(start, end):  # Iterate from start to end page
        full_url = f'{url}/{page}'  # Full URL with page number
        response = requests.get(full_url)
        soup = BeautifulSoup(response.text, 'html.parser')
        div_containers = soup.findAll('div', attrs={'class': container_name})

        for container in div_containers:
            review = container.find(element, attrs=attrs)
            if review:
                # Get the URL for each title
                if url2:
                    relative_url = review.find('a')['href']
                    absolute_url = f'{url2}/{relative_url}'
                    # Visit the URL to get the address
                    response = requests.get(absolute_url)
                    soup_inner = BeautifulSoup(response.text, 'html.parser')
                    address_element = soup_inner.find('address', class_='item-address')
                    if address_element:
                        address = address_element.text.strip()
                        data.append((address))
                else:
                    data.append(review.text.strip())
            else:
                item = container.find(element, attrs=attrs)
                if item:
                    data.append(item.text.strip())

    # Create a dataframe from a list of data
    df = pd.DataFrame({column_name: data})
    print('Total properties acquired:', len(df))
    print('Success scraped the data')
    # Show dataframe
    return df

# Scrape Booking Page on Bukit Vista Website

## Bali Vacation Rentals (https://www.bukitvista.com/bali-vacation-rentals)

### Beachfront Page

In [3]:
# Retrieve title data
df_beachfront_title = scrape_data('https://www.bukitvista.com/property-type/beachfront/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [4]:
# Retrieve address data
df_beachfront_address = scrape_data('https://www.bukitvista.com/property-type/beachfront/page', '', 'd-flex align-items-center h-100', 'Address', 'address', {'class': 'item-address'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [5]:
# Retrieve price data
df_beachfront_price = scrape_data('https://www.bukitvista.com/property-type/beachfront/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price item-price-text'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [6]:
# Retrieve bedroom data
df_beachfront_bedroom = scrape_data('https://www.bukitvista.com/property-type/beachfront/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [7]:
# Retrieve bathroom data
df_beachfront_bathroom = scrape_data('https://www.bukitvista.com/property-type/beachfront/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [8]:
df_beachfront = pd.concat([df_beachfront_title, df_beachfront_address, df_beachfront_price, df_beachfront_bedroom, df_beachfront_bathroom], axis=1)
print('Total properties acquired: ', len(df_beachfront))
df_beachfront.head()

Total properties acquired:  8


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Beachfront Bungalow w/ Sunset View,"Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec ...",Start from USD $41 per night,1,1
1,Soothing Sound of the Waves in a Beachfront Cabin,Jalan Raya Ped Banjar Bodong Desa Ped Nusa Pen...,Start from $23 per night,1,1
2,Nusa Penida New Beachfront Cottage with Cute Pool,"Jl. Batumulapan, Batununggul, Kec. Nusa Penida,",Start from $29 USD per night,1,1
3,Minimalist Room with Superb Ocean View at Bingin,"Jl. Pantai Bingin, Pecatu, Kuta Sel., Kabupate...","Start from $1,060 USD per Month",1,1
4,Bohemian Beachfront Surf Villa by the Bingin B...,"Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kab...","Start from $5,590 USD per Month",4,4


### Island Life

In [9]:
# Retrieve title data
df_island_life_title = scrape_data('https://www.bukitvista.com/property-type/island-life/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 2
Success scraped the data


In [10]:
# Retrieve address data
df_island_life_address = scrape_data('https://www.bukitvista.com/property-type/island-life/page', '', 'd-flex align-items-center h-100', 'Address', 'address', {'class': 'item-address'}, 1, 10)

Total properties acquired: 2
Success scraped the data


In [11]:
# Retrieve price data
df_island_life_price = scrape_data('https://www.bukitvista.com/property-type/island-life/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price item-price-text'}, 1, 10)

Total properties acquired: 2
Success scraped the data


In [12]:
# Retrieve bedroom data
df_island_life_bedroom = scrape_data('https://www.bukitvista.com/property-type/island-life/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 2
Success scraped the data


In [13]:
# Retrieve bathroom data
df_island_life_bathroom = scrape_data('https://www.bukitvista.com/property-type/island-life/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 2
Success scraped the data


In [14]:
df_island_life = pd.concat([df_island_life_title, df_island_life_address, df_island_life_price, df_island_life_bedroom, df_island_life_bathroom], axis=1)
print('Total properties acquired: ', len(df_island_life))
df_island_life.head()

Total properties acquired:  2


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Peaceful Wooden Cabin in Ubud Beautiful Ricefield,"Banjar Dukuh, Kenderan, Kec. Tegallalang, Kabu...",Start from $26 USD,1,1
1,Enchanting Hillside Cabin w/ Pool @ Uluwatu Te...,"Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali","Start from $1,380 per Month",1,1


### Jungle View

In [15]:
# Retrieve title data
df_jungle_view_title = scrape_data('https://www.bukitvista.com/property-type/jungle-view/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 6
Success scraped the data


In [16]:
# Retrieve address data
df_jungle_view_address = scrape_data('https://www.bukitvista.com/property-type/jungle-view/page', '', 'd-flex align-items-center h-100', 'Address', 'address', {'class': 'item-address'}, 1, 10)

Total properties acquired: 6
Success scraped the data


In [17]:
# Retrieve price data
df_jungle_view_price = scrape_data('https://www.bukitvista.com/property-type/jungle-view/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price item-price-text'}, 1, 10)

Total properties acquired: 6
Success scraped the data


In [18]:
# Retrieve bedroom data
df_jungle_view_bedroom = scrape_data('https://www.bukitvista.com/property-type/jungle-view/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 6
Success scraped the data


In [19]:
# Retrieve bathroom data
df_jungle_view_bathroom = scrape_data('https://www.bukitvista.com/property-type/jungle-view/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 6
Success scraped the data


In [20]:
df_jungle_view = pd.concat([df_jungle_view_title, df_jungle_view_address, df_jungle_view_price, df_jungle_view_bedroom, df_jungle_view_bathroom], axis=1)
print('Total properties acquired: ', len(df_jungle_view))
df_jungle_view.head()

Total properties acquired:  6


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Ubud Summer House on the Edge of the Wilderness,"Jl. Gn. Sari, Kecamatan Ubud, Kabupaten Gianya...","Start from $1,980 USD per Month",3,3
1,Peaceful Wooden Cabin in Ubud Beautiful Ricefield,"Banjar Dukuh, Kenderan, Kec. Tegallalang, Kabu...",Start from $26 USD,1,1
2,Monumental Ubud Bamboo House Jungle View,"Jalan Tanah Ayu, Sibang Gede, Abiansemal, Kabu...","Start from $5,260 USD per Month",3,3
3,Serene Cabin with Gym & Fast Wifi in Uluwatu,"Jalan Raya Uluwatu, Pecatu, South Kuta, Badung...",Start from $730 USD per Month,1,1
4,Ecstatic Villa in Ubud w/ Greenery Garden View,"Jl. Tirta Tawar, Petulu, Kecamatan Ubud, Kabup...",Start from $690 USD per Month,1,1


### Ocean View

In [21]:
# Retrieve title data
df_ocean_view_title = scrape_data('https://www.bukitvista.com/property-type/ocean-view/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 3
Success scraped the data


In [22]:
# Retrieve address data
df_ocean_view_address = scrape_data('https://www.bukitvista.com/property-type/ocean-view/page', '', 'd-flex align-items-center h-100', 'Address', 'address', {'class': 'item-address'}, 1, 10)

Total properties acquired: 3
Success scraped the data


In [23]:
# Retrieve price data
df_ocean_view_price = scrape_data('https://www.bukitvista.com/property-type/ocean-view/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price item-price-text'}, 1, 10)

Total properties acquired: 3
Success scraped the data


In [24]:
# Retrieve bedroom data
df_ocean_view_bedroom = scrape_data('https://www.bukitvista.com/property-type/ocean-view/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 3
Success scraped the data


In [25]:
# Retrieve bathroom data
df_ocean_view_bathroom = scrape_data('https://www.bukitvista.com/property-type/ocean-view/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 3
Success scraped the data


In [26]:
df_ocean_view = pd.concat([df_ocean_view_title, df_ocean_view_address, df_ocean_view_price, df_ocean_view_bedroom, df_ocean_view_bathroom], axis=1)
print('Total properties acquired: ', len(df_ocean_view))
df_ocean_view.head()

Total properties acquired:  3


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Mesmerizing 2BR Kokomo Villa in Umalas with Pool,"Jl. Bumbak Umalas, Kerobokan Kelod, Kuta Utara...","Start from $4,750 USD per Month",2,2
1,Stunning Ocean View Nusa Dua Villa + Infinity ...,"Jl. Raya Nusa Dua Selatan, Sawangan, Nusa Dua,...","Start from $13,160 USD per Month",3,3
2,Breathtaking Bingin Ocean View Room for Surfer,"Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali...","Start from $1,580 USD per Month",1,1


### Residential

In [27]:
# Retrieve title data
df_residential_title = scrape_data('https://www.bukitvista.com/property-type/residential/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 77
Success scraped the data


In [28]:
# Retrieve address data
df_residential_address = scrape_data('https://www.bukitvista.com/property-type/residential/page', '', 'd-flex align-items-center h-100', 'Address', 'address', {'class': 'item-address'}, 1, 10)

Total properties acquired: 77
Success scraped the data


In [29]:
# Retrieve price data
df_residential_price = scrape_data('https://www.bukitvista.com/property-type/residential/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price item-price-text'}, 1, 10)

Total properties acquired: 77
Success scraped the data


In [30]:
# Retrieve bedroom data
df_residential_bedroom = scrape_data('https://www.bukitvista.com/property-type/residential/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 77
Success scraped the data


In [31]:
# Retrieve bathroom data
df_residential_bathroom = scrape_data('https://www.bukitvista.com/property-type/residential/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 77
Success scraped the data


In [32]:
df_residential = pd.concat([df_residential_title, df_residential_address, df_residential_price, df_residential_bedroom, df_residential_bathroom], axis=1)
print('Total properties acquired: ', len(df_residential))
df_residential.head()

Total properties acquired:  77


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Luxurious Private Villa with Pool in Idyllic P...,"Gg. Meregan &, Jl. Toya Ning II, Ungasan, Sout...","Start from $2,346 USD per Month",2,2
1,Villa by Rice Fields in Canggu,"Pura Wates Canggu No.39, Canggu, Kec. Kuta Uta...","Start from $2,200 USD per Month",2,2
2,Modern Breezy Homey 2BR Villa with Pool near GWK,"Ungasan, Kec. Kuta Sel., Kabupaten Badung, Bal...","Start from $2,400 USD per Month",2,2
3,Dainty Villa w/ Restful Pool for Group at Ungasan,"Kutuh, Kec. Kuta Sel., Kabupaten Badung, Bali ...","Start from $2,050 USD per Month",3,3
4,Spacious New Tropical Villa w/ GWK View,"Kutuh, Kec. Kuta Sel., Kabupaten Badung, Bali ...","Start from $3,200 USD per Month",3,3


### Rice Paddy View

In [33]:
# Retrieve title data
df_rice_paddy_view_title = scrape_data('https://www.bukitvista.com/property-type/rice-paddy-view/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [34]:
# Retrieve address data
df_rice_paddy_view_address = scrape_data('https://www.bukitvista.com/property-type/rice-paddy-view/page', '', 'd-flex align-items-center h-100', 'Address', 'address', {'class': 'item-address'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [35]:
# Retrieve price data
df_rice_paddy_view_price = scrape_data('https://www.bukitvista.com/property-type/rice-paddy-view/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price item-price-text'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [36]:
# Retrieve bedroom data
df_rice_paddy_view_bedroom = scrape_data('https://www.bukitvista.com/property-type/rice-paddy-view/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [37]:
# Retrieve bathroom data
df_rice_paddy_view_bathroom = scrape_data('https://www.bukitvista.com/property-type/rice-paddy-view/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 8
Success scraped the data


In [38]:
df_rice_paddy_view = pd.concat([df_rice_paddy_view_title, df_rice_paddy_view_address, df_rice_paddy_view_price, df_rice_paddy_view_bedroom, df_rice_paddy_view_bathroom], axis=1)
print('Total properties acquired: ', len(df_rice_paddy_view))
df_rice_paddy_view.head()

Total properties acquired:  8


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Villa by Rice Fields in Canggu,"Pura Wates Canggu No.39, Canggu, Kec. Kuta Uta...","Start from $2,200 USD per Month",2,2
1,Da Kelor,"C7X4+6V2, Jl. Raya Gelogor, Lodtunduh, Kecamat...",Start from $151 USD for 15 year lease,2,2
2,Tasteful Villa w/ Nice View of Rice Field in J...,"Dobalan, Timbulharjo, Sewon, Bantul 55185",Mulai dari Rp1.201.000 Rupiah per Malam,3,3
3,Mesmerizing 2BR Kokomo Villa in Umalas with Pool,"Jl. Bumbak Umalas, Kerobokan Kelod, Kuta Utara...","Start from $4,750 USD per Month",2,2
4,Soothing Ubud Villa w/ Wide Appeasing Garden,"Jl. Sri Wedari, Ubud, Kabupaten Gianyar, Bali ...","Start from $4,940 USD per Month",4,4


### Concat All Data for Bali Vacation Rentals

In [39]:
df_bali = pd.concat([df_beachfront, df_island_life, df_jungle_view, df_ocean_view, df_residential, df_rice_paddy_view], axis=0)
df_bali.head()

Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Beachfront Bungalow w/ Sunset View,"Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec ...",Start from USD $41 per night,1,1
1,Soothing Sound of the Waves in a Beachfront Cabin,Jalan Raya Ped Banjar Bodong Desa Ped Nusa Pen...,Start from $23 per night,1,1
2,Nusa Penida New Beachfront Cottage with Cute Pool,"Jl. Batumulapan, Batununggul, Kec. Nusa Penida,",Start from $29 USD per night,1,1
3,Minimalist Room with Superb Ocean View at Bingin,"Jl. Pantai Bingin, Pecatu, Kuta Sel., Kabupate...","Start from $1,060 USD per Month",1,1
4,Bohemian Beachfront Surf Villa by the Bingin B...,"Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kab...","Start from $5,590 USD per Month",4,4


## Yogyakarta Vacation Rentals (https://www.bukitvista.com/villa-jogja)

In [40]:
# Retrieve title data
df_yogyakarta_title = scrape_data('https://www.bukitvista.com/villa-jogja/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [41]:
# Retrieve address data
df_yogyakarta_address = scrape_data('https://www.bukitvista.com/villa-jogja/page', 'https://www.bukitvista.com/property/', 'd-flex align-items-center h-100', 'Address', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [42]:
# Retrieve price data
df_yogyakarta_price = scrape_data('https://www.bukitvista.com/villa-jogja/page', '', 'd-flex align-items-center h-100', 'Price', 'span', {'class': 'item-price-text'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [43]:
# Retrieve bedroom data
df_yogyakarta_bedroom = scrape_data('https://www.bukitvista.com/villa-jogja/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [44]:
# Retrieve bathroom data
df_yogyakarta_bathroom = scrape_data('https://www.bukitvista.com/villa-jogja/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [45]:
df_yogyakarta = pd.concat([df_yogyakarta_title, df_yogyakarta_address, df_yogyakarta_price, df_yogyakarta_bedroom, df_yogyakarta_bathroom], axis=1)
print('Total properties acquired: ', len(df_yogyakarta))
df_yogyakarta.head()

Total properties acquired:  20


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Tasteful Villa w/ Nice View of Rice Field in J...,"Dobalan, Timbulharjo, Sewon, Bantul 55185",Mulai dari Rp1.201.000 Rupiah per Malam,3,3
1,Town House in a Quiet Residence near Ambarrukmo,"Perum Jogja Town House 3, Kab. Bantul, Daerah ...",Mulai dari Rp1.020.000 Rupiah per Malam,3,3
2,Spacious 5BR Vacation Villa for Big Group in J...,"Juwangen, Purwomartani, Kec. Kalasan, Kabupate...",Mulai dari Rp2.160.000 Rupiah per Malam,5,5
3,3BR Javanese Traditional Relaxing Villa with Pool,"Sinduadi, Kec. Mlati, Kabupaten Sleman, Daerah...",Mulai dari Rp1.930.000 Rupiah per Malam,3,3
4,Jogja Historical Home in Keraton Heritage Area,"Kecamatan Kraton, Kota Yogyakarta, Daerah Isti...",Mulai dari Rp1.735.000 Rupiah per Malam,3,3


## Nusa Penida Vacation Rentals (https://www.bukitvista.com/nusa-penida-vacation-rentals)

In [46]:
# Retrieve title data
df_nusa_penida_title = scrape_data('https://www.bukitvista.com/nusa-penida-vacation-rentals/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [47]:
# Retrieve address data
df_nusa_penida_address = scrape_data('https://www.bukitvista.com/nusa-penida-vacation-rentals/page', 'https://www.bukitvista.com/property/', 'd-flex align-items-center h-100', 'Address', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [48]:
# Retrieve price data
df_nusa_penida_price = scrape_data('https://www.bukitvista.com/nusa-penida-vacation-rentals/page', '', 'd-flex align-items-center h-100', 'Price', 'span', {'class': 'item-price-text'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [49]:
# Retrieve bedroom data
df_nusa_penida_bedroom = scrape_data('https://www.bukitvista.com/nusa-penida-vacation-rentals/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [50]:
# Retrieve bathroom data
df_nusa_penida_bathroom = scrape_data('https://www.bukitvista.com/nusa-penida-vacation-rentals/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 20
Success scraped the data


In [51]:
df_nusa_penida = pd.concat([df_nusa_penida_title, df_nusa_penida_address, df_nusa_penida_price, df_nusa_penida_bedroom, df_nusa_penida_bathroom], axis=1)
print('Total properties acquired: ', len(df_nusa_penida))
df_nusa_penida.head()

Total properties acquired:  20


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Breathtaking Ocean View Cabin w/ Wi-Fi & Break...,"Banjar Kutapang, Desa Batununggul, Nusapenida,...",Start from $13 per night,1,1
1,Stylish Rustic Cabin in Penida near Seaside Cafe,"Jl. Ped - Buyuk, Ped, Nusapenida, Kabupaten Kl...",Start from $24 per night,1,1
2,Beachfront Bungalow w/ Sunset View,"Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec ...",Start from USD $41 per night,1,1
3,Soothing Sound of the Waves in a Beachfront Cabin,Jalan Raya Ped Banjar Bodong Desa Ped Nusa Pen...,Start from $23 per night,1,1
4,Penida Cabin for Diving & Snorkeling Enthusiasts,"Batununggul, Nusapenida, Kabupaten Klungkung, ...",Start from $18 per night,1,1


## Bali Long Term Rentals (https://www.bukitvista.com/bali-long-term-rentals)

In [52]:
# Retrieve title data
df_bali_long_term_title = scrape_data('https://www.bukitvista.com/bali-long-term-rentals/page', '', 'd-flex align-items-center h-100', 'Title', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 68
Success scraped the data


In [53]:
# Retrieve address data
df_bali_long_term_address = scrape_data('https://www.bukitvista.com/bali-long-term-rentals/page', 'https://www.bukitvista.com/property', 'd-flex align-items-center h-100', 'Address', 'h2', {'class': 'item-title'}, 1, 10)

Total properties acquired: 68
Success scraped the data


In [54]:
# Retrieve price data
df_bali_long_term_price = scrape_data('https://www.bukitvista.com/bali-long-term-rentals/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price'}, 1, 10)

Total properties acquired: 68
Success scraped the data


In [55]:
# Retrieve bedroom data
df_bali_long_term_bedroom = scrape_data('https://www.bukitvista.com/bali-long-term-rentals/page', '', 'd-flex align-items-center h-100', 'Bedroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 68
Success scraped the data


In [56]:
# Retrieve bathroom data
df_bali_long_term_bathroom = scrape_data('https://www.bukitvista.com/bali-long-term-rentals/page', '', 'd-flex align-items-center h-100', 'Bathroom', 'span', {'class': 'hz-figure'}, 1, 10)

Total properties acquired: 68
Success scraped the data


In [57]:
df_bali_long_term = pd.concat([df_bali_long_term_title, df_bali_long_term_address, df_bali_long_term_price, df_bali_long_term_bedroom, df_bali_long_term_bathroom], axis=1)
print('Total properties acquired: ', len(df_bali_long_term))
df_bali_long_term.head()

Total properties acquired:  68


Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Spacious New Tropical Villa w/ GWK View,"Kutuh, Kec. Kuta Sel., Kabupaten Badung, Bali ...","Start from $3,200 USD per Month",3,3
1,Mesmerizing 2BR Kokomo Villa in Umalas with Pool,"Jl. Bumbak Umalas, Kerobokan Kelod, Kuta Utara...","Start from $4,750 USD per Month",2,2
2,Stunning Ocean View Nusa Dua Villa + Infinity ...,"Jl. Raya Nusa Dua Selatan, Sawangan, Nusa Dua,...","Start from $13,160 USD per Month",3,3
3,Luxurious Private Villa with Pool in Idyllic P...,"Gg. Meregan &, Jl. Toya Ning II, Ungasan, Sout...","Start from $2,346 USD per Month",2,2
4,Villa by Rice Fields in Canggu,"Pura Wates Canggu No.39, Canggu, Kec. Kuta Uta...","Start from $2,200 USD per Month",2,2


# Concat All Scraped Data

In [58]:
df_bukit_vista = pd.concat([df_bali, df_yogyakarta, df_nusa_penida, df_bali_long_term], axis=0)
df_bukit_vista.head()

Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Beachfront Bungalow w/ Sunset View,"Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec ...",Start from USD $41 per night,1,1
1,Soothing Sound of the Waves in a Beachfront Cabin,Jalan Raya Ped Banjar Bodong Desa Ped Nusa Pen...,Start from $23 per night,1,1
2,Nusa Penida New Beachfront Cottage with Cute Pool,"Jl. Batumulapan, Batununggul, Kec. Nusa Penida,",Start from $29 USD per night,1,1
3,Minimalist Room with Superb Ocean View at Bingin,"Jl. Pantai Bingin, Pecatu, Kuta Sel., Kabupate...","Start from $1,060 USD per Month",1,1
4,Bohemian Beachfront Surf Villa by the Bingin B...,"Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kab...","Start from $5,590 USD per Month",4,4


In [59]:
df_bukit_vista.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 212 entries, 0 to 67
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Title     212 non-null    object
 1   Address   212 non-null    object
 2   Price     212 non-null    object
 3   Bedroom   212 non-null    object
 4   Bathroom  212 non-null    object
dtypes: object(5)
memory usage: 9.9+ KB


## Code Effectively Function: Call in Another File in Jupyter Notebook

In [60]:
import importlib

# Import file ex_scrape_bv.py
ex_scrape_bv = importlib.import_module('ex_scrape_bv')

# Reload the ex_scrape_bv module to update changes if any
importlib.reload(ex_scrape_bv)

# Call function scrape_data
df_residential_price = scrape_data('https://www.bukitvista.com/property-type/residential/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price item-price-text'}, 1, 10)
df_residential_price.head()

Total properties acquired: 77
Success scraped the data


Unnamed: 0,Price
0,"Start from $2,346 USD per Month"
1,"Start from $2,200 USD per Month"
2,"Start from $2,400 USD per Month"
3,"Start from $2,050 USD per Month"
4,"Start from $3,200 USD per Month"


In [61]:
import ex_scrape_bv
df_residential_price = ex_scrape_bv.scrape_data('https://www.bukitvista.com/property-type/residential/page', '', 'd-flex align-items-center h-100', 'Price', 'li', {'class': 'item-price item-price-text'}, 1, 10)
df_residential_price.head()

Total properties acquired: 77
Success scraped the data


Unnamed: 0,Price
0,"Start from $2,346 USD per Month"
1,"Start from $2,200 USD per Month"
2,"Start from $2,400 USD per Month"
3,"Start from $2,050 USD per Month"
4,"Start from $3,200 USD per Month"


# Data Preprocessing

In [62]:
# See Price column to clean
df_bukit_vista['Price'].unique()

array(['Start from USD $41 per night', 'Start from $23 per night',
       'Start from $29 USD per night', 'Start from $1,060 USD per Month',
       'Start from $5,590 USD per Month',
       'Start from $1,580 USD per Month', 'Start from $990 USD per Month',
       'Start from $26 USD', 'Start from $1,380 per Month',
       'Start from $1,980 USD per Month',
       'Start from $5,260 USD per Month', 'Start from $730 USD per Month',
       'Start from $690 USD per Month', 'Start from $890 USD per Month',
       'Start from $4,750 USD per Month',
       'Start from $13,160 USD per Month',
       'Start from $2,346 USD per Month',
       'Start from $2,200 USD per Month',
       'Start from $2,400 USD per Month',
       'Start from $2,050 USD per Month',
       'Start from $3,200 USD per Month',
       'Start from $151 USD for 15 year lease',
       'Mulai dari Rp1.201.000 Rupiah per Malam',
       'Mulai dari Rp1.020.000 Rupiah per Malam',
       'Mulai dari Rp410.500 USD per Malam',
    

In [63]:
# See Address column to clean
df_bukit_vista['Address'].unique()

array(['Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec Nusa Penida, Kabupaten Klungkung',
       'Jalan Raya Ped Banjar Bodong Desa Ped Nusa Penida, Ped, Nusa Penida, Ped, Nusapenida, Kabupaten Klungkung, Bali 80771',
       'Jl. Batumulapan, Batununggul, Kec. Nusa Penida,',
       'Jl. Pantai Bingin, Pecatu, Kuta Sel., Kabupaten Badung, Bali 80361, Indonesia',
       'Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali 80361',
       'Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali, Indonesia',
       'Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali 80361, Indonesia',
       'Banjar Bodong, Desa Ped, Kec Nusa Penida, Kabupaten Klungkung',
       'Banjar Dukuh, Kenderan, Kec. Tegallalang, Kabupaten Gianyar, Bali 80561, Indonesia',
       'Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali',
       'Jl. Gn. Sari, Kecamatan Ubud, Kabupaten Gianyar, Bali, Indonesia',
       'Jalan Tanah Ayu, Sibang Gede, Abiansemal, Kabupaten Badung, Bali',
       'Jalan Raya Ulu

In [64]:
#!pip install forex-python

In [68]:
from forex_python.converter import CurrencyRates

In [69]:
def clean_data(df):
    # Split the address column into new columns
    split_values = df['Address'].str.split(',', expand=True)

    # Check whether the number of columns matches the key
    if len(split_values.columns) == 3:
        df[['Village', 'Subdistrict', 'Regency']] = split_values
    else:
        # Adjust the number of columns with the key
        df[['Village', 'Subdistrict', 'Regency']] = split_values[[0, 1, 2]]

    # Clear whitespace around values
    df['Village'] = df['Village'].str.strip()
    df['Subdistrict'] = df['Subdistrict'].str.strip()
    df['Regency'] = df['Regency'].str.strip()

    # Determine the value of the subdistrict and regency based on keywords
    df['Subdistrict'] = df['Subdistrict'].apply(lambda x: x.lower() if isinstance(x, str) else '')
    df['Regency'] = df['Regency'].apply(lambda x: x.lower() if isinstance(x, str) else '')

    # Return the address field if subdistrict and regency are empty
    df['Address'] = df.apply(lambda row: row['Address'] if (row['Subdistrict'] == '' and row['Regency'] == '') else row['Address'], axis=1)

    # Clear whitespace around values
    df['Village'] = df['Village'].str.strip()
    df['Subdistrict'] = df['Subdistrict'].str.strip()
    df['Regency'] = df['Regency'].str.strip()

    # Replace blank values with None
    df['Village'] = df['Village'].apply(lambda x: None if x == '' else x)
    df['Subdistrict'] = df['Subdistrict'].apply(lambda x: None if x == '' else x)
    df['Regency'] = df['Regency'].apply(lambda x: None if x == '' else x)

    # Fill in the blank values with the values from the address fields
    df['Village'] = df['Village'].fillna(df['Address'])
    df['Subdistrict'] = df['Subdistrict'].fillna(df['Address'])
    df['Regency'] = df['Regency'].fillna(df['Address'])
    
    # Make the Price column just numbers by removing letters
    df.loc[:, 'Price'] = df['Price'].str.replace('Mulai dari', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('Start from', '', regex=True)
    df['Price'] = df['Price'].replace('', np.nan)
    
    # Add a Rent Type column based on length of rent (per night, per month, per year/or more)
    df['Rent Type'] = df['Price'].apply(lambda x: '2' if ('year' in str(x).lower() or 'tahun' in str(x).lower()) else '1' if ('month' in str(x).lower() or 'bulan' in str(x).lower()) else '0')

    # Make the Price column just numbers by removing letters
    df.loc[:, 'Price'] = df['Price'].str.replace('per night', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('USD per night', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('USD per Month', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('USD for 15 year lease', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('Rupiah per Malam', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('USD per Malam', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('per Month', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('per year', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('.', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('USD', '', regex=True)
    
    # Make the Price column just numbers by removing letters
    def process_price(price):
        if not any(symbol in price for symbol in ['$', 'Rp', 'R']):
            price = '$' + price
        return price
    df['Price'] = df['Price'].astype(str).apply(process_price)
    
    # Make the Price column just numbers by removing letters
    df.loc[:, 'Price'] = df['Price'].str.replace('Rp', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace('R', '', regex=True)
    
    c = CurrencyRates()
    def convert_to_usd(price):
        if '$' not in price:
            amount = float(price.replace(',', ''))
            usd_amount = c.convert('IDR', 'USD', amount)
            return '${:,.2f}'.format(usd_amount)
        return price
    df['Price'] = df['Price'].astype(str).apply(convert_to_usd)
    
    # Make the Price column just numbers (float) by removing letters
    df.loc[:, 'Price'] = df['Price'].str.replace('$', '', regex=True)
    df.loc[:, 'Price'] = df['Price'].str.replace(',', '', regex=True)
    df['Price'] = df['Price'].apply(lambda x: float(x) if x != '' else None)
    df = df.rename(columns={'Price': 'Price in USD'})
    
    # Change column from object to float
    df['Bedroom'] = df['Bedroom'].apply(lambda x: float(x) if x != '' else None)
    df['Bathroom'] = df_bukit_vista['Bathroom'].apply(lambda x: float(x) if x != '' else None)
    df['Rent Type'] = df['Rent Type'].apply(lambda x: float(x) if x != '' else None)

    return df

In [66]:
df = df_bukit_vista.copy()
df.head()

Unnamed: 0,Title,Address,Price,Bedroom,Bathroom
0,Beachfront Bungalow w/ Sunset View,"Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec ...",Start from USD $41 per night,1,1
1,Soothing Sound of the Waves in a Beachfront Cabin,Jalan Raya Ped Banjar Bodong Desa Ped Nusa Pen...,Start from $23 per night,1,1
2,Nusa Penida New Beachfront Cottage with Cute Pool,"Jl. Batumulapan, Batununggul, Kec. Nusa Penida,",Start from $29 USD per night,1,1
3,Minimalist Room with Superb Ocean View at Bingin,"Jl. Pantai Bingin, Pecatu, Kuta Sel., Kabupate...","Start from $1,060 USD per Month",1,1
4,Bohemian Beachfront Surf Villa by the Bingin B...,"Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kab...","Start from $5,590 USD per Month",4,4


In [70]:
df = clean_data(df)
df.head()

Unnamed: 0,Title,Address,Price in USD,Bedroom,Bathroom,Village,Subdistrict,Regency,Rent Type
0,Beachfront Bungalow w/ Sunset View,"Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec ...",41.0,1.0,1.0,Jl. Ped - Buyuk,banjar bodong,desa ped,0.0
1,Soothing Sound of the Waves in a Beachfront Cabin,Jalan Raya Ped Banjar Bodong Desa Ped Nusa Pen...,23.0,1.0,1.0,Jalan Raya Ped Banjar Bodong Desa Ped Nusa Penida,ped,nusa penida,0.0
2,Nusa Penida New Beachfront Cottage with Cute Pool,"Jl. Batumulapan, Batununggul, Kec. Nusa Penida,",29.0,1.0,1.0,Jl. Batumulapan,batununggul,kec. nusa penida,0.0
3,Minimalist Room with Superb Ocean View at Bingin,"Jl. Pantai Bingin, Pecatu, Kuta Sel., Kabupate...",1060.0,1.0,1.0,Jl. Pantai Bingin,pecatu,kuta sel.,0.0
4,Bohemian Beachfront Surf Villa by the Bingin B...,"Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kab...",5590.0,4.0,4.0,Jl. Pantai Bingin,pecatu,kec. kuta sel.,0.0


In [71]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 212 entries, 0 to 67
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Title         212 non-null    object 
 1   Address       212 non-null    object 
 2   Price in USD  211 non-null    float64
 3   Bedroom       212 non-null    float64
 4   Bathroom      212 non-null    float64
 5   Village       212 non-null    object 
 6   Subdistrict   212 non-null    object 
 7   Regency       212 non-null    object 
 8   Rent Type     212 non-null    float64
dtypes: float64(4), object(5)
memory usage: 16.6+ KB


# Deployment on Flask

## Install Flask in Linux-Ubuntu
pip install flask in terminal

## Deploy Process
1. To open terminal, open project folder in document, then shift + right click
2. Write `python app_scrape_bv.py`, enter
3. Open http://127.0.0.1:5000/scraped_data (@app.route('/scraped_data')) on web browser 
4. Waiting for loading the web browser
5. Get the result, then save to json file -> `bukitvista_scraped_booking_data.json`

In [72]:
import json
import pandas as pd

with open('bukitvista_scraped_booking_data.json') as json_file:
    data = json.load(json_file)

In [73]:
data

{'Address': ['Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec Nusa Penida, Kabupaten Klungkung',
  'Jalan Raya Ped Banjar Bodong Desa Ped Nusa Penida, Ped, Nusa Penida, Ped, Nusapenida, Kabupaten Klungkung, Bali 80771',
  'Jl. Batumulapan, Batununggul, Kec. Nusa Penida,',
  'Jl. Pantai Bingin, Pecatu, Kuta Sel., Kabupaten Badung, Bali 80361, Indonesia',
  'Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali 80361',
  'Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali, Indonesia',
  'Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali 80361, Indonesia',
  'Banjar Bodong, Desa Ped, Kec Nusa Penida, Kabupaten Klungkung',
  'Banjar Dukuh, Kenderan, Kec. Tegallalang, Kabupaten Gianyar, Bali 80561, Indonesia',
  'Pecatu, Kec. Kuta Sel., Kabupaten Badung, Bali',
  'Jl. Gn. Sari, Kecamatan Ubud, Kabupaten Gianyar, Bali, Indonesia',
  'Banjar Dukuh, Kenderan, Kec. Tegallalang, Kabupaten Gianyar, Bali 80561, Indonesia',
  'Jalan Tanah Ayu, Sibang Gede, Abiansemal, Kabupate

In [74]:
df_scraped = pd.DataFrame(data)
df_scraped.head()

Unnamed: 0,Address,Bathroom,Bedroom,Price,Title
0,"Jl. Ped - Buyuk, Banjar Bodong, Desa Ped, Kec ...",1,1,Start from USD $41 per night,Beachfront Bungalow w/ Sunset View
1,Jalan Raya Ped Banjar Bodong Desa Ped Nusa Pen...,1,1,Start from $23 per night,Soothing Sound of the Waves in a Beachfront Cabin
2,"Jl. Batumulapan, Batununggul, Kec. Nusa Penida,",1,1,Start from $29 USD per night,Nusa Penida New Beachfront Cottage with Cute Pool
3,"Jl. Pantai Bingin, Pecatu, Kuta Sel., Kabupate...",1,1,"Start from $1,060 USD per Month",Minimalist Room with Superb Ocean View at Bingin
4,"Jl. Pantai Bingin, Pecatu, Kec. Kuta Sel., Kab...",4,4,"Start from $5,590 USD per Month",Bohemian Beachfront Surf Villa by the Bingin B...


In [75]:
df_scraped.shape

(212, 5)