In [2]:
# Required libraries for crawling, parsing, emailing, and file handling
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import re
import csv
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email import encoders
import os

# Function to extract all email addresses from raw text
def extract_emails(text):
    return list(set(re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)))

# Function to extract the site title as a possible business name
def get_site_name(html, url):
    try:
        soup = BeautifulSoup(html, 'html.parser')
        title = soup.title.string.strip()
        return title
    except:
        # Fallback to domain name if title not found
        return url.split("//")[-1].split("/")[0]

# Function to perform Google search, visit each result, and collect email + name
def collect_contacts(query, max_results=10):
    results = search(query, num_results=max_results)
    contacts = []

    print("\nSearching and collecting data...\n")
    for url in results:
        print(f"Visiting: {url}")
        try:
            res = requests.get(url, timeout=5)
            text = res.text
            soup = BeautifulSoup(text, 'html.parser')
            emails = extract_emails(soup.get_text())  # Extract emails from visible text
            name = get_site_name(text, url)  # Try to get the business name
            for email in emails:
                contacts.append([name, email, url])
                print(f"Found: {name} - {email}")
        except Exception as e:
            print(f"Failed to process {url}: {e}")
    
    return contacts

# Function to save the collected contacts into a CSV file
def save_to_csv(data, filename="contacts.csv"):
    os.makedirs("data", exist_ok=True)  # Create 'data' folder if not exists
    path = os.path.join("data", filename)
    with open(path, "w", newline="", encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(["Name", "Email", "Source URL"])  # CSV header
        writer.writerows(data)  # Write all rows
    return path  # Return path to the saved CSV

# Function to send an email with the CSV file as an attachment
def send_csv_via_email(sender, password, receiver, subject, body, filepath):
    # Setup email components
    msg = MIMEMultipart()
    msg['From'] = sender
    msg['To'] = receiver
    msg['Subject'] = subject
    msg.attach(MIMEText(body, 'plain'))

    # Attach CSV file to the email
    with open(filepath, "rb") as f:
        part = MIMEBase('application', 'octet-stream')
        part.set_payload(f.read())
        encoders.encode_base64(part)
        part.add_header('Content-Disposition', f'attachment; filename="{os.path.basename(filepath)}"')
        msg.attach(part)

    # Send the email using Gmail SMTP server
    try:
        with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
            server.login(sender, password)
            server.sendmail(sender, receiver, msg.as_string())
            print(f"Email sent to {receiver} with attachment.")
    except Exception as e:
        print(f"Failed to send email: {e}")

# Main script execution starts here
if __name__ == "__main__":
    # Step 1: Input the Google search keyword
    search_query = input("Enter search keyword:")

    # Step 2: Collect contacts from Google search results
    contacts = collect_contacts(search_query, max_results=10)

    # Step 3: If no contacts found, exit
    if not contacts:
        print("No contacts found.")
        exit()

    # Step 4: Save contacts to a CSV file
    csv_path = save_to_csv(contacts)
    print(f"\nSaved collected data to: {csv_path}")

    # Step 5: Ask for recipient email
    print("\nSend the CSV file via email")
    your_email = "km390595@gmail.com"
    app_password = "hamw qhoo mhxq zdgq"
    send_to = input("Enter recipient email address: ")

    # Step 6: Auto-generate subject and message using search query
    subject = f"Lead Data for: {search_query}"
    message = f"""Hi,

Please find attached the contact data collected for the query: "{search_query}".

The CSV includes business names, emails, and source websites for your reference. This is automated email do not reply.

Best regards,
Hackveda Automation Bot
"""

    # Step 7: Send the email with attachment
    send_csv_via_email(your_email, app_password, send_to, subject, message, csv_path)


Enter search keyword:python coaching centres in delhi

Searching and collecting data...

Visiting: https://www.justdial.com/Delhi/Python-Training-Institutes/nct-10944102
Visiting: https://www.ducatindia.com/python-training-course-in-delhi
Visiting: https://aptrondelhi.in/best-python-training-in-delhi.html
Visiting: https://www.techstack.in/python/
Visiting: https://www.pythontraining.net/
Found: Best Python Course in Delhi | Classroom Python Training Institute - inof@pythontraining.net
Found: Best Python Course in Delhi | Classroom Python Training Institute - info@tgcjaipur.com
Found: Best Python Course in Delhi | Classroom Python Training Institute - info@tgcdehradun.com
Found: Best Python Course in Delhi | Classroom Python Training Institute - supportinfo@pythontraining.in
Visiting: https://codingbytes.com/courses/python-course/
Visiting: https://www.brillicaservices.com/blogs/python-training-institute-in-delhi
Found: Best Python Training Institute in Delhi with Placement - +91-89795