In [1]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_yellowpages(base_url, max_pages=5):
    leads = []
    for page in range(1, max_pages + 1):
        print(f"Scraping page {page}...")
        url = f"{base_url}?page={page}"  # Adjust pagination parameter as needed
        response = requests.get(url)

        if response.status_code != 200:
            print("Failed to retrieve the page.")
            break

        soup = BeautifulSoup(response.text, 'html.parser')

        # Customize these selectors based on the website structure
        businesses = soup.select('.result')  # Replace with actual class/element
        for business in businesses:
            name = business.select_one('.business-name').get_text(strip=True) if business.select_one('.business-name') else "N/A"
            address = business.select_one('.street-address').get_text(strip=True) if business.select_one('.street-address') else "N/A"
            phone = business.select_one('.phones').get_text(strip=True) if business.select_one('.phones') else "N/A"

            lead = {
                'name': name,
                'address': address,
                'phone': phone
            }
            leads.append(lead)

    return leads

if __name__ == "__main__":
    yellowpages_url = "https://www.yellowpages.com/glendale-ca/restaurants"
    leads = scrape_yellowpages(yellowpages_url, max_pages=5)

    # Save leads to a CSV file
    with open("yellowpages_leads.csv", "w", newline="", encoding="utf-8") as csvfile:
        fieldnames = ["name", "address", "phone"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(leads)

    print(f"Saved {len(leads)} leads to yellowpages_leads.csv")


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Saved 150 leads to yellowpages_leads.csv
