In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By

In [2]:
# Initialize Chrome WebDriver
driver = webdriver.Chrome()

In [3]:
# Get Zomato Mumbai Page
driver.get("https://www.zomato.com/mumbai/")

In [4]:
# Wait for the page to load (2 seconds)
time.sleep(2)

# Define variables for scrolling
scroll_pause_time = 3  # Time to wait between scrolls (seconds)
screen_height = driver.execute_script("return window.screen.height;")  # Get screen height

# Loop to scroll down the page gradually
i = 1
while True:
    # Scroll down one screen height at a time
    driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))
    i += 1
    time.sleep(scroll_pause_time)

    # Update scroll height after each scroll (content might change)
    scroll_height = driver.execute_script("return document.body.scrollHeight;")

    # Break the loop when scrolled past the content
    if (screen_height) * i > scroll_height:
        break

In [5]:
# Find restaurant listings (adjust selector if Zomato changes HTML structure)
divs = driver.find_elements(By.CLASS_NAME, "jumbo-tracker")  # May need to update class name in the future

In [6]:
# Initialize empty lists to store scraped data
name = []
cuisines = []
rating = []
price = []
address = []

# Extract data from each restaurant listing
for div in divs:
    # Get restaurant name using XPath
    name_tag = div.find_element(By.XPATH, './/h4[@class="sc-1hp8d8a-0 sc-cgThhu dFwWJC"]').text
    name.append(name_tag)

    # Get rating using XPath
    rating_tag = div.find_element(By.XPATH, './/div[@class="sc-ghUbLI lbumXf"]//div[@class="sc-1q7bklc-1 cILgox"]').text
    rating.append(rating_tag)

    # Get price for two using XPath
    price_tag = div.find_element(By.XPATH, './/div[@class="sc-ghUbLI lbumXf"]//p[@class="sc-1hez2tp-0 sc-LAuEU dHtbEm"]').text
    price.append(price_tag)

    # Get cuisines using XPath
    cuisine_tag = div.find_element(By.XPATH, './/div[@class="sc-ghUbLI lbumXf"]//p[@class="sc-1hez2tp-0 sc-LAuEU fDjWNG"]').text
    cuisines.append(cuisine_tag)

    # Get address using XPath
    address_tag = div.find_element(By.XPATH, './/div[@class="sc-iEPtyo joISYm"]//p[@class="sc-1hez2tp-0 sc-gkfylT cOAsXn"]').text
    address.append(address_tag)

In [7]:
# Create pandas dataframe from scraped data
out_df = pd.DataFrame({'name': name, 'cuisines': cuisines, 'rating': rating, 'price_for_two': price, 'address': address})

In [8]:
# Save dataframe to CSV file
out_df.to_csv("mumbai_restaurants.csv", index=False)

In [9]:
# Close the browser window
driver.close()