In [1]:
!pip install selenium




In [2]:
pip install webdriver-manager


Note: you may need to restart the kernel to use updated packages.


In [3]:
# Step 1: Install necessary libraries (only once)
!pip install selenium pandas

# Step 2: Import required libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import pandas as pd
import time

# Step 3: Setup Selenium WebDriver
chrome_path = "chromedriver.exe"  # Replace with full path if needed
options = Options()
# options.add_argument("--headless")  # Uncomment if you want headless mode

service = Service(chrome_path)
driver = webdriver.Chrome(service=service, options=options)

# Step 4: Go to Trivago search results page
url = "https://www.trivago.in/en-IN/srl/hotels-bengaluru-india?search=200-64975;dr-20250812-20250813-s;rc-1-2"
driver.get(url)
time.sleep(5)

# Step 5: Scroll to load more content
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)

# Step 6: Find all hotel elements
hotels = driver.find_elements(By.CSS_SELECTOR, 'article[data-testid="item"]')

# Step 7: Extract data
data = []

for hotel in hotels:
    try:
        name = hotel.find_element(By.CSS_SELECTOR, '[data-testid="item-name"]').text
    except:
        name = 'N/A'
    
    try:
        price = hotel.find_element(By.CSS_SELECTOR, '[data-testid="recommended-price"]').text
    except:
        price = 'N/A'
    
    try:
        rating_value = hotel.find_element(By.CSS_SELECTOR, '[itemprop="ratingValue"]').text
    except:
        rating_value = 'N/A'
    
    try:
        total_ratings = hotel.find_element(By.CSS_SELECTOR, '[data-testid="aggregate-rating-text"]').text
    except:
        total_ratings = 'N/A'
    
    try:
        distance = hotel.find_element(By.CSS_SELECTOR, '[data-testid="distance-label-section"]').text
    except:
        distance = 'N/A'

    data.append({
        'Hotel Name': name,
        'Price': price,
        'Rating': rating_value,
        'Total Ratings': total_ratings,
        'Distance from Center': distance
    })

# Step 8: Create DataFrame
df = pd.DataFrame(data)

# Step 9: Display the DataFrame
print("Scraped Hotel Data:")
print(df)

# Step 10: Save to CSV
df.to_csv("trivago_hotel_data.csv", index=False)
print("\nData saved to 'trivago_hotel_data.csv'")

# Step 11: Close the browser
driver.quit()


Scraped Hotel Data:
                                           Hotel Name    Price Rating  \
0   Holiday Inn Express Bengaluru Yeshwantpur, an ...   ₹4,491          
1                                Fountain Tree by TGI   ₹4,345          
2                  Octave Hotel & Spa - Sarjapur Road   ₹2,169          
3          ibis Bengaluru Hosur Road - An Accor Brand   ₹5,264          
4                            OYO 7898 Hotel Al Fa Inn   ₹1,022          
5       Click Hotel Bangalore - International Airport   ₹4,612          
6   Grand Continent Malleshwaram A Sarovar Portico...   ₹3,584          
7              ibis Bengaluru Hebbal - An Accor Brand   ₹5,152          
8                                    Sri Kumara Lodge     ₹983          
9                                         Snt Comfort   ₹1,142          
10              Ramada by Wyndham Bengaluru Yelahanka   ₹6,720          
11       Howard Johnson By Wyndham, Hebbal, Bengaluru   ₹6,903          
12                        Ujjwa

In [8]:
# Install dependencies
!pip install selenium pandas webdriver-manager

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Setup Selenium
options = Options()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Load your Trivago search page
driver.get("https://www.trivago.in/en-IN/srl/hotels-bengaluru-india?search=200-64975;dr-20250812-20250813-s;rc-1-2")
time.sleep(5)

# Lists for scraped data
data = []

# Pagination loop
while True:
    time.sleep(3)
    hotels = driver.find_elements(By.XPATH, "//section[contains(@class,'hotel-item')]")
    for hotel in hotels:
        name = hotel.find_element(By.XPATH, ".//h2").text if hotel.find_elements(By.XPATH, ".//h2") else None
        price = hotel.find_element(By.XPATH, ".//*[contains(@data-testid,'recommendedPrice')]").text if hotel.find_elements(By.XPATH, ".//*[contains(@data-testid,'recommendedPrice')]") else None
        rating = hotel.find_element(By.XPATH, ".//span[@itemprop='ratingValue']").text if hotel.find_elements(By.XPATH, ".//span[@itemprop='ratingValue']") else None
        rating_desc = hotel.find_element(By.XPATH, ".//span[contains(@class,'_1Wh5Tf')]/strong").text if hotel.find_elements(By.XPATH, ".//span[contains(@class,'_1Wh5Tf')]/strong") else None
        total_ratings = hotel.find_element(By.XPATH, ".//span[contains(text(),'ratings')]").text if hotel.find_elements(By.XPATH, ".//span[contains(text(),'ratings')]") else None
        distance = hotel.find_element(By.XPATH, ".//span[contains(text(),'km to City centre')]").text if hotel.find_elements(By.XPATH, ".//span[contains(text(),'km to City centre')]") else None

        data.append({
            "Hotel Name": name,
            "Price": price,
            "Rating": rating,
            "Rating Description": rating_desc,
            "Total Ratings": total_ratings,
            "Distance from City Centre": distance
        })

    # Go to next page if available
    next_btn = driver.find_elements(By.XPATH, "//button[@data-testid='pagination-next' and not(contains(@class,'disabled'))]")
    if next_btn:
        next_btn[0].click()
    else:
        break

# Create dataframe
df = pd.DataFrame(data)

# Display and save
print("Extracted DataFrame:")
display(df)
df.to_csv("trivago_bengaluru_hotels.csv", index=False)
print("Saved to trivago_bengaluru_hotels.csv")

driver.quit()


Extracted DataFrame:


Saved to trivago_bengaluru_hotels.csv


In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

# Setup
options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Trivago Bengaluru hotel listing URL
url = "https://www.trivago.in/en-IN/srl/hotels-bengaluru-india?search=200-64975;dr-20250812-20250813-s;rc-1-2"
driver.get(url)
time.sleep(5)  # wait for page to load

# Scroll to load all content (optional depending on site behavior)
for _ in range(3):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

# Prepare data
hotels = []

# Hotel containers
containers = driver.find_elements(By.CSS_SELECTOR, "div._96iWr8")

for hotel in containers:
    try:
        name = hotel.find_element(By.CSS_SELECTOR, '[data-testid="item-name"] span').text
    except:
        name = None

    try:
        price = hotel.find_element(By.CSS_SELECTOR, '[data-testid="recommended-price"]').text
    except:
        price = None

    try:
        rating_value = hotel.find_element(By.CSS_SELECTOR, 'span[itemprop="ratingValue"]').text
    except:
        rating_value = None

    try:
        category_rating = hotel.find_element(By.CSS_SELECTOR, '[data-testid="review-category-label"]').text
    except:
        category_rating = None

    try:
        total_ratings = hotel.find_element(By.CSS_SELECTOR, 'meta[itemprop="ratingCount"]').get_attribute("content")
    except:
        total_ratings = None

    try:
        distance = hotel.find_element(By.CSS_SELECTOR, '[data-testid="distance-label-section"] span').text
    except:
        distance = None

    hotels.append({
        "Hotel Name": name,
        "Price": price,
        "Rating": rating_value,
        "Category Rating": category_rating,
        "Total Ratings": total_ratings,
        "Distance from City Center": distance
    })

# Store and show dataframe
df = pd.DataFrame(hotels)
print(df.head())
df.to_csv("trivago_hotels.csv", index=False)

driver.quit()


                                          Hotel Name   Price Rating  \
0  Holiday Inn Express Bengaluru Yeshwantpur, an ...  ₹4,491    7.5   
1         ibis Bengaluru Hosur Road - An Accor Brand  ₹5,264    7.5   
2       Howard Johnson By Wyndham, Hebbal, Bengaluru  ₹6,903    8.1   
3                           OYO 7898 Hotel Al Fa Inn  ₹1,022    7.8   
4  Grand Continent Malleshwaram A Sarovar Portico...  ₹3,584    9.3   

  Category Rating Total Ratings Distance from City Center  
0            None          3813     7.8 km to City centre  
1            None          1686     8.9 km to City centre  
2            None         11441     8.3 km to City centre  
3            None           281     4.6 km to City centre  
4            None             7     3.0 km to City centre  


In [11]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Setup Chrome
options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Start URL
base_url = "https://www.trivago.in/en-IN/srl/hotels-bengaluru-india?search=200-64975;dr-20250812-20250813-s;rc-1-2&"

# All scraped hotels
all_hotels = []

# Loop through 34 pages
for page in range(1, 35):
    print(f"Scraping page {page}...")
    url = base_url + f"page={page}"
    driver.get(url)
    time.sleep(5)

    # Scroll to bottom (load lazy content)
    for _ in range(3):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    containers = driver.find_elements(By.CSS_SELECTOR, "div._96iWr8")

    for hotel in containers:
        try:
            name = hotel.find_element(By.CSS_SELECTOR, '[data-testid="item-name"] span').text
        except:
            name = None

        try:
            price = hotel.find_element(By.CSS_SELECTOR, '[data-testid="recommended-price"]').text
        except:
            price = None

        try:
            rating_value = hotel.find_element(By.CSS_SELECTOR, 'span[itemprop="ratingValue"]').text
        except:
            rating_value = None

        try:
            category_rating = hotel.find_element(By.CSS_SELECTOR, '[data-testid="review-category-label"]').text
        except:
            category_rating = None

        try:
            total_ratings = hotel.find_element(By.CSS_SELECTOR, 'meta[itemprop="ratingCount"]').get_attribute("content")
        except:
            total_ratings = None

        try:
            distance = hotel.find_element(By.CSS_SELECTOR, '[data-testid="distance-label-section"] span').text
        except:
            distance = None

        all_hotels.append({
            "Hotel Name": name,
            "Price": price,
            "Rating": rating_value,
            "Category Rating": category_rating,
            "Total Ratings": total_ratings,
            "Distance from City Center": distance
        })

# Save and print
df = pd.DataFrame(all_hotels)
print(df.head())
df.to_csv("trivago_hotels.csv", index=False)

driver.quit()


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
                                          Hotel Name   Price Rating  \
0  Holiday Inn Express Bengaluru Yeshwantpur, an ...  ₹4,491    7.5   
1         ibis Bengaluru Hosur Road - An Accor Brand  ₹5,264    7.5   
2       Howard Johnson By Wyndham, Hebbal, Bengaluru  ₹6,903    8.1   
3  Grand Continent Malleshwaram A Sarovar Por