In [1]:
import os
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# Define the scope for modifying emails
SCOPES = ['https://www.googleapis.com/auth/gmail.modify']

def authenticate_gmail():
    creds = None
    # Load credentials from file if they exist
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If no valid credentials, let the user log in
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for future use
        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    return build('gmail', 'v1', credentials=creds)

service = authenticate_gmail()


In [2]:
import joblib
import re
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the trained model
model = joblib.load('model.pkl')

# Load the same vectorizer used during training
vectorizer = joblib.load('tfidf_vectorizer.pkl')  # Example if using Tfidf

In [4]:
def get_unread_emails(service):
    results = service.users().messages().list(userId='me', labelIds=['INBOX'], q="is:unread").execute()
    messages = results.get('messages', [])
    return messages

def get_email_content(service, message_id):
    message = service.users().messages().get(userId='me', id=message_id, format='full').execute()
    content = ''
    if 'payload' in message and 'parts' in message['payload']:
        for part in message['payload']['parts']:
            if part['mimeType'] == 'text/plain':
                content = part['body']['data']
    return content


In [5]:
def preprocess_content(content):
    # Example preprocessing steps
    content = re.sub(r'\W', ' ', content)  # Remove special characters
    content = content.lower()  # Convert to lowercase
    return content

In [6]:
def classify_email(content):
    # Preprocess the email content
    processed_content = preprocess_content(content)
    
    # Convert content to vector (if using a vectorizer like Tfidf)
    content_vector = vectorizer.transform([processed_content])
    
    # Predict using the trained model
    prediction = model.predict(content_vector)
    
    # Map the boolean prediction to "Spam" or "Ham"
    if prediction[0]:
        return "Spam"
    else:
        return "Ham"

In [7]:
def update_email_label(service, message_id, label):
    if label == "Spam":
        label_id = 'SPAM'
    else:
        label_id = 'INBOX'  # Keep it in Inbox or add a custom label
    service.users().messages().modify(
        userId='me', id=message_id, body={'addLabelIds': [label_id]}).execute()


In [8]:
import schedule
import time

In [9]:
def check_and_classify_emails():
    service = authenticate_gmail()
    emails = get_unread_emails(service)
    for email in emails:
        email_content = get_email_content(service, email['id'])
        label = classify_email(email_content)
        update_email_label(service, email['id'], label)

In [10]:
# Run this function every 10 minutes
schedule.every(10).minutes.do(check_and_classify_emails)

Every 10 minutes do check_and_classify_emails() (last run: [never], next run: 2024-08-24 23:35:48)

In [23]:
# Example usage
email_content = "Hi, I wanted to check in about our meeting tomorrow. Let me know if the time is still good for you."
label = classify_email(email_content)
print(f"The email is classified as: {label}")


The email is classified as: Spam


In [None]:
# to check manually for an unread to be classified to spam or ham
check_and_classify_emails()

In [None]:
# if we want to sense the 
while True:
    schedule.run_pending()
    time.sleep(1)