In [1]:
import imaplib
import email
from email.header import decode_header
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
import import_ipynb
from pdf_processing import (
    extract_dimensions_with_sizes,
    process_dimensions,
    save_to_mongodb,
    save_to_excel_formatted,
    extract_spec_sheet_data,
    save_spec_sheet_to_mongo,
    save_to_excel,
    extract_image_from_pdf,
    generate_pdf_from_excel_and_properties
)
import pandas as pd


In [2]:

# Function to connect to the email and download attachments
def fetch_email_attachment(email_address, password, download_folder):
    sender_email = None  # Initialize sender_email variable
    imap = imaplib.IMAP4_SSL("imap.gmail.com")
    imap.login(email_address, password)

    # Select the mailbox you want to check
    imap.select("inbox")

    # Search for all emails
    status, messages = imap.search(None, "ALL")
    messages = messages[0].split()

    # Fetch the latest email
    latest_email = messages[-1]
    res, msg = imap.fetch(latest_email, "(RFC822)")

    for response in msg:
        if isinstance(response, tuple):
            # Parse the raw email
            msg = email.message_from_bytes(response[1])

            # Extract sender email
            sender_email = msg["From"]
            sender_email = email.utils.parseaddr(sender_email)[1]  # Extract clean email address

            # Check if email has attachments
            for part in msg.walk():
                if part.get_content_disposition() == "attachment":
                    filename = part.get_filename()
                    if filename:
                        filename = decode_header(filename)[0][0]
                        if isinstance(filename, bytes):
                            filename = filename.decode()
                        # Save the attachment to the download folder
                        filepath = os.path.join(download_folder, filename)
                        with open(filepath, "wb") as f:
                            f.write(part.get_payload(decode=True))
                        print(f"Downloaded attachment: {filename}")
                        return filepath, sender_email

    imap.logout()
    return None, sender_email


In [3]:

# Function to send email with attachments
def send_email_with_attachments(to_email, subject, body, attachments, sender_email, sender_password):
    # Set up the email
    msg = MIMEMultipart()
    msg["From"] = sender_email
    msg["To"] = to_email
    msg["Subject"] = subject

    # Attach the body text
    msg.attach(MIMEText(body, "plain"))

    # Attach files
    for file_path in attachments:
        with open(file_path, "rb") as f:
            part = MIMEBase("application", "octet-stream")
            part.set_payload(f.read())
        encoders.encode_base64(part)
        part.add_header(
            "Content-Disposition",
            f"attachment; filename={os.path.basename(file_path)}",
        )
        msg.attach(part)

    # Send the email
    with smtplib.SMTP("smtp.gmail.com", 587) as server:
        server.starttls()
        server.login(sender_email, sender_password)
        server.sendmail(sender_email, to_email, msg.as_string())
        print(f"Email sent to {to_email}")


In [4]:

# Main Function
def main():
    # Email credentials
    email_address = "abisheak9363@gmail.com"
    password = "hvhw yxee inpo qwdx"  # Replace with your app password (not raw password)
    sender_password = password

    # Folder to save the downloaded PDF
    download_folder = "./downloads"
    os.makedirs(download_folder, exist_ok=True)

    # Fetch the attachment and sender email
    pdf_path, sender_email = fetch_email_attachment(email_address, password, download_folder)

    if pdf_path and sender_email:
        print(f"PDF file ready for processing: {pdf_path}")
        print(f"Sender email: {sender_email}")

        # Pass the PDF to the existing code's main function
        output_excel_path = "output_formatted.xlsx"
        db_name = "PDFData_sample"
        collection_name = "DynamicDimensionsNew"

        # Extract dimensions dynamically
        dimensions, headers = extract_dimensions_with_sizes(pdf_path)

        # Process and structure dimensions
        dataframe = process_dimensions(dimensions, headers)

        # Save to MongoDB
        save_to_mongodb(dataframe, db_name, collection_name)

        # Save to Excel with formatting
        save_to_excel_formatted(dataframe, output_excel_path)

        # SPEC_SHEET----------------------
        spec_sheet_data = extract_spec_sheet_data(pdf_path)

        # Save to MongoDB
        save_spec_sheet_to_mongo(spec_sheet_data)

        # Save to Excel
        if spec_sheet_data:
            df = pd.DataFrame(spec_sheet_data)
            output_file = "spec_sheet_output.xlsx"
            save_to_excel(df, output_file)

        # Budget document generation-------------
        spec_pdf_file = "spec_final.pdf"
        extract_image_from_pdf(pdf_path)
        generate_pdf_from_excel_and_properties(output_file, spec_pdf_file, pdf_path)

        print("PDF processing completed!")

        # Send email with attachments back to sender
        send_email_with_attachments(
            to_email=sender_email,
            subject="Processed Files from Your Submission",
            body="Dear User,\n\nPlease find attached the processed Excel and PDF files.\n\nBest regards,\nAutomated Processor",
            attachments=[output_excel_path, spec_pdf_file],
            sender_email=email_address,
            sender_password=sender_password,
        )
    else:
        print("No PDF attachment found or sender email not available.")

if __name__ == "__main__":
    main()


Downloaded attachment: input_doc.pdf
PDF file ready for processing: ./downloads\input_doc.pdf
Sender email: iam.rohit1789@gmail.com
Data saved to MongoDB database 'PDFData_sample', collection 'DynamicDimensionsNew'
Formatted data successfully saved to output_formatted.xlsx
8 records saved to MongoDB collection 'spec_sheet'.
Spec Sheet data successfully saved to spec_sheet_output.xlsx




PDF generated successfully: spec_final.pdf
PDF processing completed!
Email sent to iam.rohit1789@gmail.com
