In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import csv
import time

# === Start of Script ===
if __name__ == "__main__":
    input_file = "/Users/rakesh/Downloads/cleaning_company.csv"              # Replace with your input CSV file
    output_file = "meta_scraping_results.csv"

    # Set up browser
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    driver = webdriver.Chrome(options=options)

    # Read URLs from CSV
    urls = []
    with open(input_file, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        for row in reader:
            if row:
                url = row[0].strip()
                if not url.startswith("http"):
                    url = "https://" + url
                urls.append(url)

    # Scrape data
    results = []
    for i, url in enumerate(urls):
        print(f"Scraping {i + 1}/{len(urls)}: {url}")
        data = {
            'url': url,
            'title': '',
            'meta_description': '',
            'meta_keywords': '',
            'status': 'success'
        }

        try:
            driver.get(url)
            time.sleep(2)

            data['title'] = driver.title

            try:
                desc = driver.find_element(By.XPATH, "//meta[@name='description']")
                data['meta_description'] = desc.get_attribute('content')
            except:
                data['meta_description'] = 'Not found'

            try:
                keywords = driver.find_element(By.XPATH, "//meta[@name='keywords']")
                data['meta_keywords'] = keywords.get_attribute('content')
            except:
                data['meta_keywords'] = 'Not found'

        except Exception as e:
            data['status'] = f'error: {str(e)}'

        results.append(data)

    # Save to CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=['url', 'title', 'meta_description', 'meta_keywords', 'status'])
        writer.writeheader()
        writer.writerows(results)

    # Close browser
    driver.quit()
    print(f"\nDone! Results saved to '{output_file}'")

Scraping 1/5: https://﻿https://dubaihousekeeping.com/
Scraping 2/5: https://elitemaids.ae/
Scraping 3/5: https://justmaid.ae/
Scraping 4/5: https://servicemarket.com/en/dubai/cleaning-maid-services
Scraping 5/5: https://dubaiclean.com/

Done! Results saved to 'meta_scraping_results.csv'


In [None]:
import os
import json
from serpapi import GoogleSearch

# SerpAPI key - either use the provided one or get from environment variable
api_key = os.environ.get("SERPAPI_KEY") or "0dfdc537381f5983d131d9a92bd29f22a55ffdb8931bc36e2b0d99d848bd90fb"

# Set up the search parameters
params = {
    "q": "Top Sights Paris",
    "location": "Austin, Texas, United States",
    "hl": "en",
    "gl": "us",
    "api_key": api_key
}

# Perform the search
search = GoogleSearch(params)
results = search.get_dict()

# Extract ads from the results
ads = results.get("ads", [])
print(f"Found {len(ads)} ads")

# Print information about the ads
for i, ad in enumerate(ads, 1):
    print(f"Ad {i}: {ad.get('title')}")
    print(f"  Link: {ad.get('link')}")
    print(f"  Description: {ad.get('description', '')[:100]}...")
    
    # Extract and print sitelinks if available
    sitelinks = ad.get('sitelinks', [])
    if sitelinks:
        print(f"  {len(sitelinks)} sitelinks:")
        for link in sitelinks:
            print(f"    - {link.get('title')}: {link.get('link')}")
    print()

# Extract organic results
organic_results = results.get("organic_results", [])
print(f"Found {len(organic_results)} organic results")

# Extract places or top sights if available
places_results = results.get("places_results", []) or results.get("top_sights", [])
if places_results:
    print(f"Found {len(places_results)} places/top sights")

# Save the complete results to a JSON file
output_file = "paris_top_sights.json"
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=2, ensure_ascii=False)
print(f"Results saved to {output_file}")

# Example of how to access specific data
print("\nExample data extraction:")
if ads:
    first_ad = ads[0]
    print(f"First ad position: {first_ad.get('position')}")
    print(f"First ad block position: {first_ad.get('block_position')}")
    
    # Additional fields in the first ad
    fields_to_show = ["title", "displayed_link", "tracking_link", "extensions"]
    for field in fields_to_show:
        value = first_ad.get(field)
        if isinstance(value, list):
            print(f"{field}: {', '.join(value)}")
        else:
            print(f"{field}: {value}")