## Import Libraries

In [7]:
import textwrap
import json
import base64
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from datetime import datetime, timedelta

## Extracting mails

1. Build GMAIL API by using credentials in gmail_api_desktop_credentials.json
2. Based on the query retrieve the mails in a list.

In our case query is last week unread mails

In [8]:
class GmailAPI:
    """
    Class to interact with the Gmail API.
    """

    SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]

    def __init__(self, token_file="../data/output/token.json", credentials_file="../data/input/gmail_api_desktop_credentials.json"):
        """
        Initializes the GmailAPI object.
        """
        self.token_file = token_file
        self.credentials_file = credentials_file
        self.service = None  # Initialize service as None

    def connect_api(self):
        """
        Connects to the Gmail API and sets the service object.
        """
        creds = None
        if os.path.exists(self.token_file):
            creds = Credentials.from_authorized_user_file(self.token_file, self.SCOPES)

        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                    self.credentials_file, self.SCOPES
                )
                creds = flow.run_local_server(port=0)

            with open(self.token_file, "w") as token:
                token.write(creds.to_json())

        try:
            # Call the Gmail API
            self.service = build("gmail", "v1", credentials=creds)

        except HttpError as error:
            print(f"An error occurred: {error}")

    def retrieve_mails(self, query):
        """
        Retrieves unread emails from Gmail based on the provided query.
        Returns a list of email content.
        """
        if not self.service:
            print("Service not connected. Call connect_api() first.")
            return []

        email_content = []

        # Retrieve unread emails
        results = self.service.users().messages().list(userId='me', q=query).execute()
        unread_messages = results.get('messages', [])
        
        # Process each unread email
        for message in unread_messages:
            email_content_str = ""
            msg_id = message['id']
            message = self.service.users().messages().get(userId='me', id=msg_id, format='full').execute()
            
            # Extract the payload of the message
            payload = message['payload']
            parts = payload.get('parts', [])
            # Iterate through the parts to find the text/plain part
            for part in parts:
                if part['mimeType'] == 'text/plain':
                    data = part['body']['data']
                    # Decode and append the text content
                    decoded_data = base64.urlsafe_b64decode(data).decode('utf-8')
                    if decoded_data:
                        email_content_str += decoded_data + " "
                    break  # Stop iterating if text/plain part is found
            
            if email_content_str:
                email_content.append(email_content_str)
        
        return email_content

In [9]:
gmail_api = GmailAPI()
gmail_api.connect_api()

# Calculate the date one week ago from today
one_week_ago = (datetime.utcnow() - timedelta(days=7)).strftime('%Y/%m/%d')

# Construct the search query to retrieve emails received after one week ago
query = f"is:unread after:{one_week_ago}"
email_content = gmail_api.retrieve_mails(query)

### Saving the content

In [10]:
len(email_content)

32

In [11]:
wrapped_text = textwrap.fill(email_content[2], width=80)
print(wrapped_text)

Kaggle                  Hi Sai Samyukta Palle,    It's Leap Day, and to
celebrate, Google Cloud Next is offering 65% off with    promo code NEXT699LEAP
today!* 🙌 The conference is happening in Las Vegas    on April 9 - 11 and is
packed with ML and AI programming, including a lot    of amazing Kaggle-focused
content.            For example:      Workshops with Kaggle Grandmasters like
David Austen (recent 1st place    winner in our LLM – Detect AI Generated Text
competition) & Ruchi Bhatia    (youngest 3x Kaggle Grandmaster)  Sessions with
prominent researchers who've launched groundbreaking Kaggle    competitions like
Rhiju Das (Stanford professor behind the Ribonanza RNA    Folding competition &
COVID-19 mRNA Vaccine Degradation Prediction    competition)  AMA with the
Kaggle team including our CEO, D. Sculley  In-person Kaggle competition with fun
prizes 🎉        Register Here        A full lineup of all the Kaggle-focused
magic at the event is coming your    way soon! We really hope

In [12]:
# File path where you want to save the data
file_path = "../data/output/data.json"

# Write the list to a JSON file
with open(file_path, "w") as file:
    json.dump(email_content, file)