In [12]:
def get_or_create_folder(parent_folder, folder_name):
    """
    Looks for a folder named folder_name under parent_folder.
    If it doesn't exist, creates it.
    """
    for folder in parent_folder.Folders:
        if folder.Name.lower() == folder_name.lower():
            return folder
    # If not found, create the folder
    return parent_folder.Folders.Add(folder_name)

In [None]:
import os
import time
import pickle
import win32com.client
import pandas as pd
import numpy as np
import torch
from transformers import BertTokenizer, BertModel
from sklearn.ensemble import RandomForestClassifier

LOG_FILE = "email_log.csv"

#  Load BERT Model & Tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
model = BertModel.from_pretrained("bert-base-multilingual-cased")
#  Ensure Outlook is connected
outlook = connect_to_outlook()
gmail_inbox = outlook.Folders.Item(2).Folders("Inbox")  # Adjust if necessary

# Define the folders used for sorting
folder_names = ["Work", "Personal", "Spam", "Projects", "High-Priority", "Interessting-Topics", "Purchasesites", "Purchases", "Physics","DataAnalysis"]
folders = {name: get_or_create_folder(gmail_inbox, name) for name in folder_names}

for name in folder_names:
    try:
        folders[name] = gmail_inbox.Folders(name)  # Try to get the folder
    except Exception:
        print(f"⚠ Warning: Folder '{name}' does not exist in Outlook. Skipping...")


def get_bert_embedding(text):
    """Generate BERT embeddings for an email."""
    tokens = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**tokens)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

# Outlook Connection
def connect_to_outlook():
    """Connects to Outlook, restarts if needed."""
    try:
        return win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
    except:
        os.system("taskkill /IM OUTLOOK.EXE /F")
        time.sleep(5)
        os.system("start outlook")
        time.sleep(10)
        return win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")

outlook = connect_to_outlook()
def get_gmail_inbox():
    """Fetches the Gmail Inbox folder from Outlook."""
    gmail_account = outlook.Folders.Item(2)  # Adjust index if needed
    return gmail_account.Folders("Inbox")

gmail_inbox = get_gmail_inbox()
def get_emails(folder, max_emails=10):
    emails = []
    count = 0

    print(f"🔍 Fetching emails from: {folder.Name}")

    for i in range(min(folder.Items.Count, max_emails), 0, -1):
        if count >= max_emails:
            break
        try:
            mail = folder.Items.Item(i)
            subject = mail.Subject.strip() if mail.Subject else "(No Subject)"
            body = mail.Body.strip() if mail.Body else "(No Body)"
            text = f"{subject} {body}"
            emails.append((text, mail))
            count += 1
        except:
            continue

    print(f" Retrieved {len(emails)} emails from {folder.Name}.")
    return emails


#  Train AI Model
all_emails = get_emails(gmail_inbox, max_emails=100)
emails_texts = [get_bert_embedding(text) for text, _ in all_emails]
#  Dynamically assign labels based on Outlook folders
#  Define training data lists
emails_texts = []
y_train = []

#  Fetch emails from each folder
for folder_name, folder in folders.items():
    emails = get_emails(folder, max_emails=10)
    for text, _ in emails:
        emails_texts.append(get_bert_embedding(text))  # Convert email to BERT embedding
        y_train.append(folder_name)  # Assign correct label

#  Convert lists to NumPy arrays
X_train = np.array(emails_texts)
y_train = np.array(y_train)

print(f" Training AI with {len(X_train)} samples and {len(y_train)} labels.")


from sklearn.ensemble import RandomForestClassifier
import pickle

if len(X_train) == len(y_train) and len(X_train) > 0:
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Save the model
    pickle.dump(clf, open("email_classifier_bert.pkl", "wb"))
    print(" AI model successfully trained and saved!")
else:
    print(f"⚠ Error: Mismatch between features ({len(X_train)}) and labels ({len(y_train)}).")



#  Logging for Dashboard
def log_email(subject, sender, folder, score):
    new_data = pd.DataFrame([{"Subject": subject, "Sender": sender, "Folder": folder, "Score": score}])
    try:
        existing_data = pd.read_csv(LOG_FILE)
        updated_data = pd.concat([existing_data, new_data], ignore_index=True)
    except FileNotFoundError:
        updated_data = new_data
    updated_data.to_csv(LOG_FILE, index=False)




🔍 Fetching emails from: Inbox
 Retrieved 100 emails from Inbox.
🔍 Fetching emails from: Work
 Retrieved 10 emails from Work.
🔍 Fetching emails from: Personal
 Retrieved 10 emails from Personal.
🔍 Fetching emails from: Spam
 Retrieved 10 emails from Spam.
🔍 Fetching emails from: Projects
 Retrieved 10 emails from Projects.
🔍 Fetching emails from: High-Priority
 Retrieved 10 emails from High-Priority.
🔍 Fetching emails from: Interessting-Topics
 Retrieved 10 emails from Interessting-Topics.
🔍 Fetching emails from: Purchasesites
 Retrieved 10 emails from Purchasesites.
🔍 Fetching emails from: Purchases
 Retrieved 0 emails from Purchases.
🔍 Fetching emails from: Physics
 Retrieved 0 emails from Physics.
🔍 Fetching emails from: DataAnalysis
 Retrieved 0 emails from DataAnalysis.
✅ Training AI with 70 samples and 70 labels.
✅ AI model successfully trained and saved!


In [28]:
import os
import time
import win32com.client

def connect_to_outlook():
    """Ensures Outlook is running and connects to the MAPI namespace."""
    try:
        outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
        return outlook
    except:
        print("⚠ Outlook connection failed. Restarting Outlook...")
        os.system("taskkill /IM OUTLOOK.EXE /F")  # Force close Outlook
        time.sleep(5)
        os.system("start outlook")  # Restart Outlook
        time.sleep(10)  # Wait for Outlook to fully load

        # Try connecting again
        try:
            outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
            print("✅ Outlook restarted and connected successfully.")
            return outlook
        except Exception as e:
            print(f"❌ Failed to reconnect to Outlook: {e}")
            return None

# Ensure connection
outlook = connect_to_outlook()
if outlook:
    gmail_inbox = outlook.Folders.Item(2).Folders("Inbox")


In [36]:
def classify_and_move_emails(max_emails=10):
    """Classifies and moves emails inside Inbox/Foldername, not root."""
    new_emails = get_emails(gmail_inbox, max_emails)

    if not new_emails:
        print("⚠ No new emails to classify.")
        return

    for text, mail in new_emails:
        email_vector = get_bert_embedding(text).reshape(1, -1)
        predicted_folder = clf.predict(email_vector)[0]  # Predict folder

        # ✅ Correct path: Move inside Inbox
        try:
            destination_folder = gmail_inbox.Folders(predicted_folder)
        except Exception:
            print(f"⚠ Warning: Folder '{predicted_folder}' not found in Inbox. Creating it...")
            destination_folder = gmail_inbox.Folders.Add(predicted_folder)

        # ✅ Move email to correct location
        mail.Move(destination_folder)
        log_email(mail.Subject, mail.SenderEmailAddress, predicted_folder, 5)  # Log for dashboard
        print(f"📧 Moved: {mail.Subject[:50]}... → Inbox/{predicted_folder}")


In [45]:
classify_and_move_emails(max_emails=50)

🔍 Fetching emails from: Inbox
 Retrieved 50 emails from Inbox.
📧 Moved: Smart Cabrio Gewinnspiel von TV Winner... → Inbox/High-Priority
📧 Moved: [Voiptalk - SIP 0000230]: can't  log into the acco... → Inbox/High-Priority
📧 Moved: Your new user account... → Inbox/High-Priority
📧 Moved: Re: und los... → Inbox/Personal
📧 Moved: Ihre teltarif.de Tariftabelle... → Inbox/Personal
📧 Moved: [rt.propagation.net #90361] hacking attempt... → Inbox/Interessting-Topics
📧 Moved: Re: hacking attempt on 2 different servers... → Inbox/Personal
📧 Moved: [rt.propagation.net #90361] AutoReply: hacking att... → Inbox/Interessting-Topics
📧 Moved: PayPal-Ueberweisung... → Inbox/Projects
📧 Moved: Bestätigungsnachricht... → Inbox/Personal
📧 Moved: Aenderung des PayPal-Passworts... → Inbox/Interessting-Topics
📧 Moved: Passwort vergessen... → Inbox/Personal
📧 Moved: ... → Inbox/Personal
📧 Moved: 1kHz... → Inbox/Projects
📧 Moved: beep... → Inbox/Personal
📧 Moved: Your new Firefly phone details... → Inbox/Interess

In [42]:
def retrain_on_corrections():
    """Retrains the AI model using emails inside Inbox/Foldername."""
    corrected_emails = []
    labels = []

    print("🔄 Retraining AI model based on manually sorted emails...")

    # ✅ Use only folders inside Inbox
    for folder_name in folder_names:
        try:
            folder = gmail_inbox.Folders(folder_name)  # Correct path: Inside Inbox
        except Exception:
            print(f"⚠ Warning: Folder '{folder_name}' not found in Inbox. Skipping...")
            continue  # Skip missing folders

        emails = get_emails(folder, max_emails=50)  # Fetch emails for training
        if not emails:
            continue  # Skip empty folders

        for text, _ in emails:
            corrected_emails.append(get_bert_embedding(text))  # Convert to BERT embedding
            labels.append(folder_name)  # Assign correct label

    if not corrected_emails:
        print("⚠ No manually corrected emails found. Skipping retraining.")
        return

    # ✅ Train a new AI model with corrected data
    X_train = np.array(corrected_emails)
    y_train = np.array(labels)

    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # ✅ Save the updated model
    pickle.dump(clf, open("email_classifier_bert.pkl", "wb"))

    print("✅ AI model retrained using manually corrected emails!")


In [43]:
retrain_on_corrections()

🔄 Retraining AI model based on manually sorted emails...
🔍 Fetching emails from: Work
 Retrieved 50 emails from Work.
🔍 Fetching emails from: Personal
 Retrieved 50 emails from Personal.
🔍 Fetching emails from: Spam
 Retrieved 50 emails from Spam.
🔍 Fetching emails from: Projects
 Retrieved 50 emails from Projects.
🔍 Fetching emails from: High-Priority
 Retrieved 17 emails from High-Priority.
🔍 Fetching emails from: Interessting-Topics
 Retrieved 46 emails from Interessting-Topics.
🔍 Fetching emails from: Purchasesites
 Retrieved 37 emails from Purchasesites.
🔍 Fetching emails from: Purchases
 Retrieved 2 emails from Purchases.
🔍 Fetching emails from: Physics
 Retrieved 2 emails from Physics.
🔍 Fetching emails from: DataAnalysis
 Retrieved 0 emails from DataAnalysis.
✅ AI model retrained using manually corrected emails!
