In [170]:
import pandas as pd
import re
import unicodedata
from rapidfuzz import process

# Load the product catalog
catalog = pd.read_csv(r"C:\Users\jimmy\Downloads\Product Catalog.csv")

# Lowercase product names for easier matching
catalog['Product_Name_Lower'] = catalog['Product_Name'].str.lower()
catalog['Product_Code_Lower'] = catalog['Product_Code'].str.lower()

# === Normalize text ===
def normalize_text(text):
    text = unicodedata.normalize('NFKC', text)
    text = text.replace('\u2013', '-').replace('\u2014', '-')  # dashes
    text = ' '.join(text.split())  # normalize whitespace
    return text


def normalize_catalog_text(text):
    text = unicodedata.normalize('NFKD', text)
    text = ''.join(c for c in text if not unicodedata.combining(c))
    text = text.lower()
    return text

catalog['Product_Name_Normalized'] = catalog['Product_Name'].apply(normalize_catalog_text)
catalog['Product_Code_Normalized'] = catalog['Product_Code'].str.lower()

def extract_order_from_email(email_text):
    email_text = normalize_text(email_text)

    # Split into lines *and also split sentences if bullets are inline*
    bullet_split = re.split(r'(?:\n+|(?<=\s)[*\-•–]\s+)', email_text)
    lines = []
    for part in bullet_split:
        lines.extend(part.strip().split('\n'))

    results = []

    # These patterns can now match *anywhere* in the line
    patterns = [
        r'(?i)(\d+)\s*(?:x|pcs|pieces|units)?\s+(?:of\s+)?([A-Za-zÄÖÜäöüßÉéÑñÅåØøÆæ\-\' ]+\d{2,})',
        r'(?i)([A-Za-zÄÖÜäöüßÉéÑñÅåØøÆæ\-\' ]+\d{2,})\s*[-–]\s*qty[:\s]*([0-9]+)',
        r'(?i)([A-Za-zÄÖÜäöüßÉéÑñÅåØøÆæ\-\' ]+\d{2,})\s*[-–]\s*need\s*([0-9]+)',
        r'(?i)([A-Za-zÄÖÜäöüßÉéÑñÅåØøÆæ\-\' ]+\d{2,})\s*:\s*([0-9]+)\s*(?:pcs|pieces|units)?',
        r'(?i)(\d+)\s*(?:pcs|pieces|units)?[:\s]*\s*([A-Za-zÄÖÜäöüßÉéÑñÅåØøÆæ\-\' ]+\d{2,})',
    ]

    for line in lines:
        line = line.strip()
        for pattern in patterns:
            match = re.search(pattern, line)  # <-- use re.search not match!
            if match:
                g1, g2 = match.groups()
                try:
                    qty = int(g1)
                    name = g2.strip()
                except ValueError:
                    qty = int(g2)
                    name = g1.strip()
                results.append((name, qty))
                break  # stop after first successful match

    return results


# def extract_by_proximity_to_catalog(email_text, catalog, threshold=85):
#     email_text = normalize_text(email_text)
#     tokens = re.findall(r'\b[\wÄäÖöÜüßÉéÅåÁáØøÆæÇçÑñ-]+\b', email_text)
#     lowered_tokens = [t.lower() for t in tokens]
#     results = []

#     product_names = catalog['Product_Name'].tolist()
#     product_name_lowers = [p.lower() for p in product_names]

#     for i, token in enumerate(lowered_tokens):
#         match_tuple = process.extractOne(token, product_name_lowers)
#         if match_tuple:
#             match, score = match_tuple[0], match_tuple[1]
#             if score >= threshold:
#                 try:
#                     match_index = product_name_lowers.index(match)
#                     product_name = product_names[match_index]

#                     # Look backward and forward for nearby integers
#                     window = 5
#                     quantity = None
#                     for j in range(1, window + 1):
#                         # Before
#                         if i - j >= 0 and lowered_tokens[i - j].isdigit():
#                             quantity = int(lowered_tokens[i - j])
#                             break
#                         # After
#                         if i + j < len(lowered_tokens) and lowered_tokens[i + j].isdigit():
#                             quantity = int(lowered_tokens[i + j])
#                             break

#                     if quantity is not None:
#                         results.append((product_name, quantity))
#                 except:
#                     continue

#     return results



In [171]:
def find_product_fuzzy(product_input, threshold=50):
    product_input = normalize_catalog_text(product_input)

    names = catalog['Product_Name_Normalized'].tolist()
    codes = catalog['Product_Code_Normalized'].tolist()

    match_name, score_name, _ = process.extractOne(product_input, names)
    match_code, score_code, _ = process.extractOne(product_input, codes)


    if score_name >= threshold:
        return catalog[catalog['Product_Name_Normalized'] == match_name].iloc[0]
    elif score_code >= threshold:
        return catalog[catalog['Product_Code_Normalized'] == match_code].iloc[0]
    else:
        return None


In [172]:
def process_order(email_text):
    order_items = extract_order_from_email(email_text)
    print("🔍 Extracted items from email:", order_items)
    final_order = []

    for product_input, quantity in order_items:
        while True:
            product = find_product_fuzzy(product_input)
            if product is None:
                print(f"❌ Product '{product_input}' not found in catalog.")
                retry = input("Would you like to enter a different product name/code? (yes/no): ").strip().lower()
                if retry == "yes":
                    product_input = input("Please enter the correct product name or code: ").strip()
                    continue
                else:
                    print("Skipping unrecognized product.")
                    break

            # Prompt user to fix quantity if it's invalid
            while quantity < product['Min_Order_Quantity'] or quantity > product['Available_in_Stock']:
                if quantity < product['Min_Order_Quantity']:
                    print(f"⚠️ You ordered {quantity}, but minimum for '{product['Product_Name']}' is {product['Min_Order_Quantity']}.")
                if quantity > product['Available_in_Stock']:
                    print(f"⚠️ You ordered {quantity}, but only {product['Available_in_Stock']} available for '{product['Product_Name']}'.")

                fix = input("Would you like to update the quantity? (yes/no): ").strip().lower()
                if fix == "yes":
                    quantity = int(input("Enter new quantity: "))
                else:
                    print("Skipping this item.")
                    break

            # Only add if still valid
            if product['Min_Order_Quantity'] <= quantity <= product['Available_in_Stock']:
                final_order.append((product, quantity))
            break

    return final_order


In [181]:
def summarize_order(order):
    total = 0
    lines = []
    for product, quantity in order:
        line_total = product["Price"] * quantity
        lines.append(f"{quantity} x {product['Product_Name']} @ ${product['Price']:.2f} = ${line_total:.2f}")
        total += line_total
    lines.append(f"💰 Total Price: ${total:.2f}")
    summary = "\n".join(lines)
    print("\n🧾 Final Order Summary:")
    print(summary)
    return summary

In [176]:
def extract_address_from_email(email_text):
    address_keywords = [
        r'ship to',
        r'deliver to',
        r'send to',
        r'delivery address',
        r'shipping address',
        r'deliver at',
        r'send them to',
        r'delivery location',
        r'do deliver to'
    ]

    lines = email_text.splitlines()
    candidates = []

    for i, line in enumerate(lines):
        lower_line = line.lower()
        for keyword in address_keywords:
            if keyword in lower_line:
                # Grab the line with the keyword and the next 1–2 lines in case the address spans multiple lines
                snippet = line.strip()
                if i + 1 < len(lines):
                    snippet += " " + lines[i + 1].strip()
                if i + 2 < len(lines):
                    snippet += " " + lines[i + 2].strip()
                candidates.append(snippet)
    
    # Clean up and return the best candidate
    if candidates:
        return candidates[0].strip()
    
    # Fallback: try to match addresses using a basic regex (e.g., postal-like patterns)
    fallback_match = re.search(r'\d{1,5}[\w\s\.,-]+(?:USA|Germany|Japan|Mexico|Lebanon|[A-Z]{2} \d{5})', email_text, re.IGNORECASE)
    if fallback_match:
        return fallback_match.group(0).strip()

    return None

In [179]:
import smtplib
from email.message import EmailMessage

def send_email(to_email, subject, body, from_email, app_password):
    msg = EmailMessage()
    msg['Subject'] = subject
    msg['From'] = from_email
    msg['To'] = to_email
    msg.set_content(body)

    try:
        with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
            smtp.login(from_email, app_password)
            smtp.send_message(msg)
        print("✅ Email sent successfully.")
    except Exception as e:
        print("❌ Failed to send email:", e)


In [None]:
# === Main driver ===
def main():
    email_file_path = r"C:\Users\jimmy\Downloads\sample_email_1.txt"  # Update path
    try:
        with open(email_file_path, 'r', encoding='utf-8') as f:
            email_text = f.read()
    except FileNotFoundError:
        print("❌ Could not find the email file. Check the path.")
        return

    print("📧 Analyzing email...")
    order = process_order(email_text)
    if not order:
        print("❌ No valid items confirmed in order.")
        return

    total = summarize_order(order)
    order_summary = summarize_order(order)


    # 🆕 Extract and print shipping address
    shipping_address = extract_address_from_email(email_text)
    if shipping_address:
        print(f"📍 Shipped to: {shipping_address}")
    else:
        print("📍 Shipping address not found.")

    confirm = input("\n✅ Confirm order? (yes/no): ").strip().lower()
    if confirm == "yes":
        customer_email = input("Enter customer email address: ")
        sender_email = input("Enter your Gmail address: ")
        sender_password = input("Enter your Gmail app password: ")

        subject = "Order Confirmation"
        body = f"Thank you for your order!\n\n{order_summary}\n\nShipped to: {shipping_address}"

        send_email(customer_email, subject, body, sender_email, sender_password)
        print(f"📤 Sending confirmation email to {customer_email} (mock)...")
        print("✅ Order confirmed. Email sent.")
    else:
        print("❌ Order not confirmed.")

# Run it
if __name__ == "__main__":
    main()


📧 Analyzing email...
🔍 Extracted items from email: [('Office LUNDMARK 699', 8)]
⚠️ You ordered 8, but minimum for 'Office LUNDMARK 699' is 10.

🧾 Final Order Summary:
11 x Office LUNDMARK 699 @ $590.66 = $6497.26
💰 Total Price: $6497.26

🧾 Final Order Summary:
11 x Office LUNDMARK 699 @ $590.66 = $6497.26
💰 Total Price: $6497.26
📍 Shipped to: Do deliver to: 2-11-3 Meguro, Tokyo, Japan Before: June 18, 2025
❌ Failed to send email: (534, b'5.7.9 Application-specific password required. For more information, go to\n5.7.9  https://support.google.com/mail/?p=InvalidSecondFactor 586e51a60fabf-2eab8edca7esm1603595fac.42 - gsmtp')
📤 Sending confirmation email to jimmyp2002@gmail.com (mock)...
✅ Order confirmed. Email sent.


In [26]:
!pip install rapidfuzz


Collecting rapidfuzz
  Downloading rapidfuzz-3.13.0-cp311-cp311-win_amd64.whl.metadata (12 kB)
Downloading rapidfuzz-3.13.0-cp311-cp311-win_amd64.whl (1.6 MB)
   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
   - -------------------------------------- 0.0/1.6 MB 393.8 kB/s eta 0:00:05
   -- ------------------------------------- 0.1/1.6 MB 508.4 kB/s eta 0:00:04
   -- ------------------------------------- 0.1/1.6 MB 590.8 kB/s eta 0:00:03
   ----- ---------------------------------- 0.2/1.6 MB 1.1 MB/s eta 0:00:02
   ----- ---------------------------------- 0.2/1.6 MB 1.1 MB/s eta 0:00:02
   ---------- ----------------------------- 0.4/1.6 MB 1.5 MB/s eta 0:00:01
   ----------------- ---------------------- 0.7/1.6 MB 2.0 MB/s eta 0:00:01
   --------------------- ------------------ 0.9/1.6 MB 2.0 MB/s eta 0:00:01
   ----------------------------- ---------- 1.2/1.6 MB 2.8 MB/s eta 0:00:01
   ----------

In [2]:
print("hi")

hi
