In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE_URL = "https://www.yellowpages.com/search?search_terms=IT+Services&geo_location_terms=California"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

company_data = []

def scrape_page(url):
    """Scrapes a single page for company data."""
    try:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Finding all company blocks on the page
        companies = soup.find_all('div', class_='result')
        for company in companies:
            try:
                name = company.find('a', class_='business-name').text.strip()
            except AttributeError:
                name = None
            
            try:
                website = company.find('a', class_='track-visit-website')['href']
            except (AttributeError, TypeError):
                website = None

            try:
                contact = company.find('div', class_='phones phone primary').text.strip()
            except AttributeError:
                contact = None

            try:
                address = company.find('p', class_='adr').text.strip()
            except AttributeError:
                address = None

            try:
                category = company.find('div', class_='categories').text.strip()
            except AttributeError:
                category = None

            try:
                description = company.find('div', class_='snippet').text.strip()
            except AttributeError:
                description = None

            try:
                email = None 
            except AttributeError:
                email = None

            # Adding the data to the list
            company_data.append({
                "Company Name": name,
                "Website URL": website,
                "Contact Number": contact,
                "Location/Address": address,
                "Industry/Category": category,
                "Company Description": description,
                "Email Address": email
            })

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the page: {e}")

# Scraping first 3 pages
for page in range(1, 4):
    print(f"Scraping page {page}...")
    url = f"{BASE_URL}&page={page}"
    scrape_page(url)
    time.sleep(2)

# Saving the data to a CSV file
df = pd.DataFrame(company_data)
df.to_csv('company_data2.csv', index=False)
print("Data saved to company_data2.csv")


Scraping page 1...
Scraping page 2...
Scraping page 3...
Data saved to company_data2.csv


In [11]:
df=pd.read_csv("company_data2.csv")

In [12]:
df

Unnamed: 0,Company Name,Website URL,Contact Number,Location/Address,Industry/Category,Company Description,Email Address
0,Geeks On Site,,,Serving your area.,Computers & Computer Equipment-Service & Repair,,
1,T & T Truck & Crane Service,http://truckandcrane.com,(805) 232-5116,,Trucking-Heavy HaulingCranes-Renting & Leasing...,From Business: T&T Truck and Crane Service is ...,
2,Waters Vacuum Truck Service,https://www.watersvacuum.com,(775) 276-6124,Serving theCA Area,Septic Tank & System CleaningPlumbing-Drain & ...,The crew at Waters are the best! My old septi...,
3,Gastelums' Tree Service,http://www.gastelumtreeservice.com,(707) 435-3072,,Landscaping & Lawn ServicesTree Service,From Business: Are you looking for a quality t...,
4,Ernie's Plumbing & Sewer Service,http://www.erniesplumbingandsewer.com,(707) 515-7550,Serving theCA Area,Building ContractorsPlumbersPlumbing-Drain & S...,George was a employee that any company would b...,
...,...,...,...,...,...,...,...
86,Team Insurance & Financial Services Inc.,https://teaminsurance.net,(707) 462-5901,,Life InsuranceHealth InsuranceHomeowners Insur...,"good service and information""",
87,Roy's Screen Service,https://roysmobilescreen.com,(424) 253-8244,Serving theCA Area,Screen EnclosuresScreening & Sifting Equipment...,From Business: Proudly serving West Los Angele...,
88,JC Pressure Washing & Landscaping Services,,(916) 396-4718,,Power WashingLandscape ContractorsPressure Was...,From Business: JC Pressure Washing & Landscapi...,
89,Nationwide Transport Services,https://ntslogistics.com,(844) 684-7608,Serving theCA Area,Container Freight ServiceTrucking Transportati...,"I am really happy with the services, great tea...",
