In [3]:
import csv
import requests
from bs4 import BeautifulSoup

# Base URL of the webpage to scrape
base_url = "https://www.atninfo.com/uae/all/restaurants-1239"

# Function to scrape restaurant names, P.O. Box numbers, area, location, phone numbers, mobile numbers, and categories from a given page number
def scrape_restaurant_info(page_num, session):
    if page_num == 1:
        url = f"{base_url}"
    else:
        url = f"{base_url}/{page_num}"

    response = session.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        restaurant_divs = soup.find_all('div', class_='col-7 center-vertically')

        if not restaurant_divs:  # No more restaurants found, stop scraping
            return None

        info = []
        for div in restaurant_divs:
            name_tag = div.find('h2', itemprop='name')
            if name_tag:
                name = name_tag.get_text(strip=True)

                # Find the parent div that contains the P.O. Box, area, location, phone number, mobile number, and category information
                parent_div = div.find_parent('div', class_='row').find_next_siblings('div', class_='row')

                po_box = "N/A"
                area = "N/A"
                location = "N/A"
                phone = "N/A"
                mobile = "N/A"
                category = "Restaurants"  # Default value for category

                for sibling_div in parent_div:
                    po_box_tag = sibling_div.find('span', itemprop='postalCode')
                    if po_box_tag:
                        po_box = po_box_tag.get_text(strip=True)
                        if not po_box:
                            po_box = "N/A"

                    area_tag = sibling_div.find('span', itemprop='addressLocality')
                    if area_tag:
                        area = area_tag.get_text(strip=True)
                        if not area:
                            area = "N/A"

                    location_tag = sibling_div.find('span', itemprop='streetAddress')
                    if location_tag:
                        location = location_tag.get_text(strip=True)

                    phone_tag = sibling_div.find('span', itemprop='telephone')
                    if phone_tag:
                        phone = phone_tag.get_text(strip=True)

                    mobile_tag = sibling_div.find('a', class_='mobileClick')
                    if mobile_tag:
                        mobile = mobile_tag.find_next('span').get_text(strip=True)

                    # Extract category information from the provided snippet
                    category_tags = sibling_div.find_all('a', class_='badge-list clicktoscroll clickCats')
                    if category_tags:
                        categories = [tag.get_text(strip=True) for tag in category_tags]
                        category = ", ".join(categories)

                info.append((name, po_box, area, location, phone, mobile, category))

        return info
    else:
        print(f"Failed to retrieve page {page_num}. Status code:", response.status_code)
        return None

# Number of pages to scrape
num_pages = 120  # Adjust this number based on how many pages you want to attempt to scrape

# Create a session for efficient HTTP requests
session = requests.Session()

all_restaurant_info = []
for page in range(1, num_pages + 1):
    info = scrape_restaurant_info(page, session)
    if info is None:  # Stop scraping if no more restaurant entries are found
        break
    all_restaurant_info.extend(info)

# Close the session
session.close()

# Write data to CSV file
with open("Restaurants_dataatninfo.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    # Write header row
    writer.writerow(["Name", "P.O. Box", "Area", "Location", "Phone", "Mobile", "Category"])
    # Write data rows
    for name, po_box, area, location, phone, mobile, category in all_restaurant_info:
        writer.writerow([name, po_box, area, location, phone, mobile, category])

print("Data has been saved to Restaurants_dataatninfo.csv")

Data has been saved to Restaurants_dataatninfo.csv
