# Imports

In [1]:
import csv
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

from datetime import datetime
import re

# TripAdvisor Script 0
- Fixed filtering native reviews
- Unable to retrieve review with reviews with no date just below the username

In [2]:
def fix_date(date):
  date_dict = {'Okt':'Oct', 'Des':'Dec', 'Agt':'Aug', 'Mei':'May'}
      
  # Replace date with proper abbreviations
  for key, val in date_dict.items():
    if key in date:
      date = date.replace(key, val)
  
  date = datetime.strptime(date, '%b %Y')
  date = date.strftime('%Y-%m')
  
  return date

In [3]:
def scrape_review_s0(attraction, link):
  
  print(f"[INFO] Currently Scrapping: {attraction}")
  
  start_time = datetime.now()
  
  # Open csv file to write in
  file_path = str(datetime.now().strftime('%Y_%m_%d')) + '_' + attraction.replace(" ", "_").lower() + '.csv'
  file = open(f'data/raw/{file_path}', 'a', encoding="utf-8", newline='')
  csv_writer = csv.writer(file)
  
  # CSV Header
  csv_writer.writerow(['date', 'user', 'review', 'attraction', 'source'])

  # Open up browser and navigate to page
  driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
  driver.get(link)
  
  # Get number of pages
  total_reviews = driver.find_element(By.XPATH, "//*[@href='#REVIEWS']/div[1]/span[1]/span[1]").text.replace('.', '')
  total_reviews = int(total_reviews)
  num_pages = (total_reviews // 10) + 1
  
  # Set review and page number counter
  review_count = 0
  page_num = 1
  
  for i in range(num_pages):
    
    # Let page load (change to until element exist later)
    time.sleep(3)   
    
    # Extract elements               
    users = driver.find_elements(By.XPATH, "//*[@data-automation='reviewCard']//span[contains(text(),'Google Terjemahan')]//parent::button//parent::div[1]/div[1]/div[1]/div[2]/span")
    reviews = driver.find_elements(By.XPATH, "//*[@data-automation='reviewCard']//span[contains(text(),'Google Terjemahan')]//parent::button//parent::div/div[5]/div[1]")
    dates = driver.find_elements(By.XPATH, "//*[@class='eRduX']")
    
    # Open csv file and write to it
    for j in range(len(reviews)):
      user = users[j].text
      review = reviews[j].text
      date = fix_date(dates[j].text[:9].strip())
      
      # Write reviews and date into csv file
      csv_writer.writerow([date, user, review, attraction, 'tripadvisor'])      
      
    review_count += len(reviews)
    
    # Click next page
    next = driver.find_element(By.XPATH, "//a[@aria-label='Next page']//*[name()='svg']")
    next.click()
    page_num += 1
    
    # If no. of native reviews on page is 0, exit loop
    if len(reviews) == 0:
      break
    
    # print(f"Review Count: {review_count}")
    # print(f"Current page: {page_num}")
  
  # ----- Scrapping Info -----
  print('-------------- Scrape Completed --------------')
  print(f"Total Reviews: {total_reviews}")
  print(f"Total Native Reviews: {review_count}")
  end_time = datetime.now()
  print('Runtime: {}'.format(end_time - start_time))
  
  file.close()
  driver.close()

In [4]:
df = pd.read_csv('attractions_list_script0.csv', header=None, names=["Attraction", "Link"])
df

Unnamed: 0,Attraction,Link
0,Universal Studios Singapore,https://www.tripadvisor.co.id/Attraction_Revie...
1,S.E.A Aquarium,https://www.tripadvisor.co.id/Attraction_Revie...
2,Skyline Luge Sentosa,https://www.tripadvisor.co.id/Attraction_Revie...
3,Adventure Cove Waterpark,https://www.tripadvisor.co.id/Attraction_Revie...
4,Madame Tussauds,https://www.tripadvisor.co.id/Attraction_Revie...
5,Trick Eye Museum Singapore,https://www.tripadvisor.co.id/Attraction_Revie...
6,iFly Singapore,https://www.tripadvisor.co.id/Attraction_Revie...
7,Sentosa 4D AdventureLand,https://www.tripadvisor.co.id/Attraction_Revie...
8,Hydrodash Waterpark,https://www.tripadvisor.co.id/Attraction_Revie...
9,Sentosa Boardwalk,https://www.tripadvisor.co.id/Attraction_Revie...


In [5]:
for index, row in df.iterrows():
    try:
        scrape_review_s0(row['Attraction'], row['Link'])
    except:
        print(f"No Native Reviews Found on {row['Attraction']}")
    finally:
        continue





[INFO] Currently Scrapping: Universal Studios Singapore


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Trying to download new driver from https://chromedriver.storage.googleapis.com/100.0.4896.60/chromedriver_win32.zip
Driver has been saved in cache [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60]


-------------- Scrape Completed --------------
Total Reviews: 20005
Total Native Reviews: 636
Runtime: 0:03:51.112751






[INFO] Currently Scrapping: S.E.A Aquarium


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 6746
Total Native Reviews: 141
Runtime: 0:00:58.506456






[INFO] Currently Scrapping: Skyline Luge Sentosa


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 3238
Total Native Reviews: 42
Runtime: 0:00:24.666864






[INFO] Currently Scrapping: Adventure Cove Waterpark


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 2482
Total Native Reviews: 15
Runtime: 0:00:13.497763






[INFO] Currently Scrapping: Madame Tussauds


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 2104
Total Native Reviews: 108
Runtime: 0:00:44.498938






[INFO] Currently Scrapping: Trick Eye Museum Singapore


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 1122
Total Native Reviews: 29
Runtime: 0:00:18.115009






[INFO] Currently Scrapping: iFly Singapore


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 678
Total Native Reviews: 2
Runtime: 0:00:10.028811






[INFO] Currently Scrapping: Sentosa 4D AdventureLand


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 478
Total Native Reviews: 1
Runtime: 0:00:09.877041






[INFO] Currently Scrapping: Hydrodash Waterpark


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 18
Total Native Reviews: 0
Runtime: 0:00:07.049891






[INFO] Currently Scrapping: Sentosa Boardwalk


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 1337
Total Native Reviews: 75
Runtime: 0:00:34.272393






[INFO] Currently Scrapping: Palawan Beach


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 1022
Total Native Reviews: 25
Runtime: 0:00:16.792585






[INFO] Currently Scrapping: Fort Siloso


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 697
Total Native Reviews: 10
Runtime: 0:00:10.423675






[INFO] Currently Scrapping: Tanjong Beach


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 307
Total Native Reviews: 6
Runtime: 0:00:10.157619






[INFO] Currently Scrapping: Sentosa Nature Discovery


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 163
Total Native Reviews: 5
Runtime: 0:00:10.540163






[INFO] Currently Scrapping: Siloso Beach


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 1305
Total Native Reviews: 53
Runtime: 0:00:27.121569






[INFO] Currently Scrapping: Wave House Sentosa


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 222
Total Native Reviews: 1
Runtime: 0:00:10.170466






[INFO] Currently Scrapping: KidZania Singapore


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 561
Total Native Reviews: 2
Runtime: 0:00:10.225114






[INFO] Currently Scrapping: Wings of Time


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 2124
Total Native Reviews: 59
Runtime: 0:00:30.916831






[INFO] Currently Scrapping: Butterfly Park & Insect Kingdom


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 604
Total Native Reviews: 3
Runtime: 0:00:11.678907






[INFO] Currently Scrapping: The Maritime Experiential Museum


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 305
Total Native Reviews: 10
Runtime: 0:00:10.206669






[INFO] Currently Scrapping: Fort Siloso Skywalk


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 185
Total Native Reviews: 0
Runtime: 0:00:06.737446






[INFO] Currently Scrapping: Floating Bridge at Siloso Beach


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 233
Total Native Reviews: 5
Runtime: 0:00:10.161137






[INFO] Currently Scrapping: Imbiah Lookout


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 38
Total Native Reviews: 1
Runtime: 0:00:10.575142






[INFO] Currently Scrapping: Air Wave House Sentosa


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 222
Total Native Reviews: 1
Runtime: 0:00:09.823921






[INFO] Currently Scrapping: The Sentosa Bus


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 359
Total Native Reviews: 19
Runtime: 0:00:14.078318






[INFO] Currently Scrapping: The Sentosa Express


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 3753
Total Native Reviews: 142
Runtime: 0:00:58.936715






[INFO] Currently Scrapping: Resort World Sentosa


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 832
Total Native Reviews: 24
Runtime: 0:00:17.284786






[INFO] Currently Scrapping: HeadRock VR


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 29
Total Native Reviews: 0
Runtime: 0:00:06.922553






[INFO] Currently Scrapping: Magical Shores


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


No Native Reviews Found on Magical Shores






[INFO] Currently Scrapping: Southernmost point of Asia


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


-------------- Scrape Completed --------------
Total Reviews: 156
Total Native Reviews: 1
Runtime: 0:00:10.363354






[INFO] Currently Scrapping: SkyHelix Sentosa


Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\Luqman\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


No Native Reviews Found on SkyHelix Sentosa
