In [30]:
# import requests

# api_key = 'AIzaSyDOTut4xJhVq96mT9yoWL1jQ-Oy2gkfNDo'
# base_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json'
# params = {
#     'location': 'latitude,longitude',
#     'radius': '1000',  # Example radius in meters
#     'type': 'restaurant',  # Example type
#     'key': api_key,
# }

# response = requests.get(base_url, params=params)

# if response.status_code == 200:
#     data = response.json()
#     # Process the data as needed
#     print(data)
# else:
#     print(f"Error: {response.status_code}")


In [4]:
# web scraping of Yelp

In [37]:
import requests
from bs4 import BeautifulSoup
import csv
import os

def scrape_yelp(url, csv_file_path='yelp_data2.csv'):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    # Check if the CSV file already exists
    if os.path.exists(csv_file_path):
        # Load existing data from the CSV
        with open(csv_file_path, 'r', newline='', encoding='utf-8') as csvfile:
            existing_data = list(csv.DictReader(csvfile))
    else:
        # If the CSV file doesn't exist, initialize with an empty list
        existing_data = []

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract information based on the HTML structure of Yelp's search results page
        # Adjust these according to the actual structure of the website
        businesses = soup.find_all('div', class_='css-1qn0b6x')

        data = []

        for business in businesses:
            name_element = business.find('a', class_='css-19v1rkv')
            rating_element = business.find('span', class_='css-gutk1c')

            # Extract address information from alternative structures
            address_element = business.find('span', class_='css-chan6m')
            if not address_element:
                address_element = business.find('div', class_='css-bwc5d7')

            # Extract cuisine information from button elements
            cuisine_elements = business.select('span.css-1d8srnw a.css-abnp9g button.css-1rvjnnw span.css-11bijt4')

            # Check if the elements are found before accessing their text property
            if name_element and rating_element and address_element:
                name = name_element.text.strip()

                # Extract only the numeric part of the rating
                rating_text = rating_element.text.strip()
                rating = float(rating_text.split()[0]) if rating_text.split()[0].replace('.', '').isdigit() else None

                address = address_element.text.strip()

                # Extract cuisine information
                cuisines = [cuisine.text.strip() for cuisine in cuisine_elements]

                # If cuisine information is not found in the previous attempt, look for an alternative structure
                if not cuisines:
                    cuisine_elements_alt = business.select('span[class^="css-"] a span.css-11bijt4')
                    cuisines = [cuisine.text.strip() for cuisine in cuisine_elements_alt]

                # Check if the business with the same name is already in the data
                if not any(item['Name'] == name for item in data + existing_data):
                    # Append the data to the list
                    data.append({'Name': name, 'Rating': rating, 'Address': address, 'Cuisine': cuisines})

        # Combine existing data with new data
        all_data = existing_data + data

        # Save the combined data to the CSV file
        with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['Name', 'Rating', 'Address', 'Cuisine']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            # Write the header
            writer.writeheader()

            # Write the data
            writer.writerows(all_data)

        print(f"Scraped data saved to {csv_file_path}")
    else:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")

# Example usage with the provided Yelp search results URL
# Update the URL with the desired page number
scrape_yelp('https://www.yelp.com/search?find_desc=food+in+sg&find_loc=Singapore%2C+Singapore&start=200')


Scraped data saved to yelp_data2.csv
