In [5]:
import imaplib
import email
from email.header import decode_header
import os
import pdfplumber
import pandas as pd
from pymongo import MongoClient
from reportlab.pdfgen import canvas
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders


In [None]:
# Email credentials
EMAIL_USER = "your_email@example.com"
EMAIL_PASS = "your_password"
IMAP_SERVER = "imap.example.com"
SMTP_SERVER = "smtp.example.com"
SMTP_PORT = 587

# MongoDB setup
MONGO_URI = "mongodb://localhost:27017/"
DB_NAME = "email_data_db"
COLLECTION_NAME = "budget_data"


In [None]:

def read_email():
    # Connect to the email server
    mail = imaplib.IMAP4_SSL(IMAP_SERVER)
    mail.login(EMAIL_USER, EMAIL_PASS)
    mail.select("inbox")

    # Search for emails with attachments
    status, messages = mail.search(None, 'ALL')
    for num in messages[0].split():
        _, msg = mail.fetch(num, "(RFC822)")
        for response_part in msg:
            if isinstance(response_part, tuple):
                # Parse the raw email
                msg = email.message_from_bytes(response_part[1])
                subject, encoding = decode_header(msg["Subject"])[0]
                if isinstance(subject, bytes):
                    subject = subject.decode(encoding if encoding else "utf-8")
                print(f"Email Subject: {subject}")

                # Process attachments
                for part in msg.walk():
                    if part.get_content_disposition() == "attachment":
                        filename = part.get_filename()
                        if filename and filename.endswith(".pdf"):
                            filepath = os.path.join("downloads", filename)
                            with open(filepath, "wb") as f:
                                f.write(part.get_payload(decode=True))
                            print(f"Attachment saved: {filename}")
                            return filepath
    mail.logout()


In [None]:

def extract_tabular_data_from_pdf(pdf_path):
    extracted_data = []
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            tables = page.extract_tables()
            for table in tables:
                for row in table:
                    extracted_data.append(row)
    # Convert to DataFrame
    df = pd.DataFrame(extracted_data)
    print("Data extracted successfully:")
    print(df)
    return df


In [None]:

def save_data_to_mongo(df):
    client = MongoClient(MONGO_URI)
    db = client[DB_NAME]
    collection = db[COLLECTION_NAME]
    data = df.to_dict(orient="records")
    collection.insert_many(data)
    print("Data saved to MongoDB.")

def save_to_excel(df, excel_path):
    df.to_excel(excel_path, index=False)
    print(f"Data saved to Excel: {excel_path}")


In [None]:

def generate_budget_pdf(df, pdf_path):
    c = canvas.Canvas(pdf_path)
    c.drawString(100, 800, "Budget Document")
    y = 780
    for _, row in df.iterrows():
        c.drawString(100, y, str(row.values))
        y -= 20
    c.save()
    print(f"Budget PDF generated: {pdf_path}")


In [None]:

def send_email_with_attachments(to_email, subject, body, files):
    msg = MIMEMultipart()
    msg['From'] = EMAIL_USER
    msg['To'] = to_email
    msg['Subject'] = subject

    # Attach the body
    msg.attach(email.mime.text.MIMEText(body, "plain"))

    # Attach files
    for file in files:
        attachment = MIMEBase("application", "octet-stream")
        with open(file, "rb") as f:
            attachment.set_payload(f.read())
        encoders.encode_base64(attachment)
        attachment.add_header("Content-Disposition", f"attachment; filename={os.path.basename(file)}")
        msg.attach(attachment)

    # Send the email
    server = smtplib.SMTP(SMTP_SERVER, SMTP_PORT)
    server.starttls()
    server.login(EMAIL_USER, EMAIL_PASS)
    server.send_message(msg)
    server.quit()
    print("Email sent successfully.")


In [None]:

if __name__ == "__main__":
    # Workflow
    pdf_path = read_email()
    if pdf_path:
        df = extract_tabular_data_from_pdf(pdf_path)
        save_data_to_mongo(df)

        excel_path = "output.xlsx"
        pdf_output_path = "budget.pdf"

        save_to_excel(df, excel_path)
        generate_budget_pdf(df, pdf_output_path)

        # Send email back to the user
        send_email_with_attachments(
            to_email="client_email@example.com",
            subject="Output Documents",
            body="Please find attached the extracted Excel and Budget PDF documents.",
            files=[excel_path, pdf_output_path]
        )