In [2]:
import requests
from bs4 import BeautifulSoup
import urllib.parse
import csv
import json

def google_search(query, num_results=10, file_export=None, domain=None, filetype=None):
    """
    Perform a Google search and return the results.

    Args:
        query (str): The search term.
        num_results (int): Number of results to retrieve.
        file_export (str): File format to export results ('csv' or 'json').
        domain (str): Restrict results to a specific domain (e.g., 'example.com').
        filetype (str): Restrict results to a specific filetype (e.g., 'pdf').

    Returns:
        list: A list of search results containing title, link, and description.
    """
    base_url = "https://www.google.com/search"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
    }

    # Add optional filters to the query
    if domain:
        query += f" site:{domain}"
    if filetype:
        query += f" filetype:{filetype}"

    params = {
        "q": query,
        "num": num_results
    }

    response = requests.get(base_url, headers=headers, params=params)
    if response.status_code != 200:
        print(f"Failed to retrieve results. HTTP Status Code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    results = []

    for result in soup.select('.tF2Cxc'):
        title_element = result.select_one('h3')
        link_element = result.select_one('a')
        description_element = result.select_one('.VwiC3b')

        if title_element and link_element and description_element:
            title = title_element.get_text()
            link = link_element['href']
            description = description_element.get_text()

            # Highlight the search query in the description
            if query.lower() in description.lower():
                description = description.replace(
                    query, f"\033[1m{query}\033[0m"
                )

            results.append({
                "title": title,
                "link": link,
                "description": description
            })

    if not results:
        print("No results found. The HTML structure might have changed.")
    else:
        print(f"Found {len(results)} results.")

    # Export results to file if requested
    if file_export:
        export_results(results, file_export)

    return results

def export_results(results, file_format):
    """
    Export search results to a file.

    Args:
        results (list): Search results to export.
        file_format (str): The format to export ('csv' or 'json').
    """
    if file_format == 'csv':
        with open("google_search_results.csv", "w", newline="", encoding="utf-8") as file:
            writer = csv.DictWriter(file, fieldnames=["title", "link", "description"])
            writer.writeheader()
            writer.writerows(results)
        print("Results exported to google_search_results.csv")
    elif file_format == 'json':
        with open("google_search_results.json", "w", encoding="utf-8") as file:
            json.dump(results, file, ensure_ascii=False, indent=4)
        print("Results exported to google_search_results.json")
    else:
        print(f"Unsupported file format: {file_format}")

# Usage
if __name__ == "__main__":
    print("Welcome to Google Search Scraper!")
    query = input("Enter your search query: ")
    num_results = int(input("How many results do you want? (Default: 10): ") or 10)
    domain = input("Restrict to a specific domain (leave blank for none): ")
    filetype = input("Restrict to a specific filetype (e.g., 'pdf', leave blank for none): ")
    export_format = input("Export results to file? (Enter 'csv', 'json', or leave blank): ")

    results = google_search(query, num_results, file_export=export_format, domain=domain, filetype=filetype)

    # Display results
    for idx, result in enumerate(results, start=1):
        print(f"{idx}. {result['title']}")
        print(f"   Link: {result['link']}")
        print(f"   Description: {result['description']}\n")


Welcome to Google Search Scraper!


Enter your search query:  html tutorial
How many results do you want? (Default: 10):  1
Restrict to a specific domain (leave blank for none):  
Restrict to a specific filetype (e.g., 'pdf', leave blank for none):  
Export results to file? (Enter 'csv', 'json', or leave blank):  


Found 1 results.
1. HTML Tutorial
   Link: https://www.w3schools.com/html/
   Description: In this HTML tutorial, you will find more than 200 examples. With our online "Try it Yourself" editor, you can edit and test each example yourself!

