# Title: Extracting Information from Emails using Regular Expressions

In [1]:
import re
import pandas as pd

In [2]:
# Sample dataset of financial emails
sample_emails = [
    """Hello John,

    Your transaction of $1,250.75 on 2024-08-21 to account number ACC987654321 has been processed. 
    If you did not authorize this, please contact us at fraud@bankingcorp.com.

    Thank you,
    Banking Corp""",

    """Dear Lisa,

    A withdrawal of $3,500.00 was made from your account ACC123456789 on 2025-01-15.
    Contact support@securebank.com if this wasn’t you.

    Regards,
    SecureBank""",

    """Hi Mark,

    We noticed a login from a new device on 2025-03-10.
    Your account (ACC456789123) remains secure.
    Any issues? Reach us at help@mybank.org.

    Best,
    MyBank Security""",

    """Hello Negin,

    A charge of $999.99 was posted on 2024-12-02 to your card ending in 1234. 
    If this was not your purchase, notify us at billing@financesafe.ca.

    Regards,
    FinanceSafe"""
]


In [3]:
# Function to extract relevant information using regex
def extract_info(text):
    name = re.findall(r"Hello\s+([A-Z][a-z]+)|Dear\s+([A-Z][a-z]+)|Hi\s+([A-Z][a-z]+)", text)
    date = re.findall(r"\d{4}-\d{2}-\d{2}", text)
    amount = re.findall(r"\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?", text)
    account = re.findall(r"ACC\d+", text)
    email = re.findall(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", text)

    return {
        "name": next((n for group in name for n in group if n), None),
        "date": date[0] if date else None,
        "amount": amount[0] if amount else None,
        "account_number": account[0] if account else None,
        "email": email[0] if email else None,
    }

In [4]:
# Apply extraction to each email
results = [extract_info(email) for email in sample_emails]

# Convert results to a DataFrame
df = pd.DataFrame(results)

# Show the DataFrame
print("\nExtracted Information:")
print(df)

# Save to a CSV file
df.to_csv("extracted_financial_info.csv", index=False)
print("\nSaved results to 'extracted_financial_info.csv'")


Extracted Information:
    name        date     amount account_number                    email
0   John  2024-08-21  $1,250.75   ACC987654321   fraud@bankingcorp.com.
1   Lisa  2025-01-15  $3,500.00   ACC123456789   support@securebank.com
2   Mark  2025-03-10       None   ACC456789123         help@mybank.org.
3  Negin  2024-12-02    $999.99           None  billing@financesafe.ca.

Saved results to 'extracted_financial_info.csv'
