In [None]:
import os

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

import base64

from weasyprint import HTML

from dotenv import load_dotenv

from datetime import datetime
from datetime import date
from datetime import time

In [None]:
# Google's API Python Fast Start


def main():
  """Shows basic usage of the Gmail API.
  Lists the user's Gmail labels.
  """
  creds = None
  # The file token.json stores the user's access and refresh tokens, and is
  # created automatically when the authorization flow completes for the first
  # time.
  if os.path.exists("token.json"):
    creds = Credentials.from_authorized_user_file("token.json", SCOPES)
  # If there are no (valid) credentials available, let the user log in.
  if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
      creds.refresh(Request())
    else:
      flow = InstalledAppFlow.from_client_secrets_file(
          "credentials.json", SCOPES
      )
      creds = flow.run_local_server(port=0)
    # Save the credentials for the next run
    with open("token.json", "w") as token:
      token.write(creds.to_json())

  try:
    # Call the Gmail API
    service = build("gmail", "v1", credentials=creds)
    results = service.users().labels().list(userId="me").execute()
    labels = results.get("labels", [])

    if not labels:
      print("No labels found.")
      return
    print("Labels:")
    for label in labels:
      print(label["name"])

  except HttpError as error:
    # TODO(developer) - Handle errors from gmail API.
    print(f"An error occurred: {error}")


if __name__ == "__main__":
  main()

In [None]:
# Convert HTMLs to PDF file Test.

counter = 0
for html_item in resulting_htmls:
    
    toWriteItem = HTML(string=html_item)
    
    outFile = f"out_{counter}.pdf"

    toWriteItem.write_pdf(target=outFile)

    counter+=1



# Command terminal prompt
weasyprint test2.html out.pdf

Working Script

In [None]:
load_dotenv(".env")
receipts_folder = os.getenv("RECEIPTS_FOLDER")


for item in results['messages']:

    item_id = item['id']
    email = service.users().messages().get(userId="me",id=item_id,format="full").execute()

    html_body = base64.urlsafe_b64decode(email["payload"]["parts"][1]["body"]["data"]).decode('utf-8') 

    try:
        rawDate = email["payload"]["headers"][18]["value"]

        parsedDate = datetime.strptime(rawDate, '%a, %d %b %Y %X %z')
        month = parsedDate.strftime('%B').upper()
        day = parsedDate.strftime('%d')
    except:
        date = None

    if date == None:
        file_path = os.path.join(receipts_folder, "Gasoline - Unable to Parse Date.pdf")

        counter = 0
        while os.path.exists(file_path):
            file_path = os.path.join(receipts_folder, f"Gasoline - Unable to Parse Date_{counter + 1}.pdf")
    else:

        fileDirectory = os.path.join(receipts_folder, month)

        if not os.path.exists(fileDirectory):



            os.mkdir(fileDirectory)

        file_path = os.path.join(fileDirectory, f"Gasoline - {month} {day}.pdf")
        
        counter = 0
        while os.path.exists(file_path):
            file_path = f"Gasoline - {month} {day}_{counter + 1}.pdf"


    toWriteItem = HTML(string=html_body)
    toWriteItem.write_pdf(target=file_path)
    markAsRead = service.users().messages().modify(userId="me",id=item_id,body={"removeLabelIds": ['UNREAD']}).execute()
    

Functions

In [None]:
def validate_credentials(SCOPES):
    """
    Loads credentials and builds a service object using the respective endpoint.
    Then, it executes the query to retrieve the messages.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists("token.json"):
      creds = Credentials.from_authorized_user_file("token.json", SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
      if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
      else:
        flow = InstalledAppFlow.from_client_secrets_file(
            "credentials.json", SCOPES
        )
        creds = flow.run_local_server(port=0)
      # Save the credentials for the next run
      with open("token.json", "w") as token:
        token.write(creds.to_json())
  
    if creds != None:
      return creds
    else:
      print("An error ocurred while validating credentials.")
    

In [None]:
def get_email_Ids(query, service, format):
    try:
        # Call the Gmail API
        
        results = service.users().messages().list(userId="me",q=query).execute()

        return results
    
    except HttpError as error:
        # TODO(developer) - Handle errors from gmail API.
        print(f"An error occurred: {error}")

In [None]:
def get_html(item, service, format) -> str:

    if format not in ["full","raw"]:
        print("The only accepted formats are full or raw.")
        return
    
    item_id = item["id"]

    email = service.users().messages().get(userId="me",id=item_id,format=format).execute()

    if format == "full":
        try:
            html_body = base64.urlsafe_b64decode(email["payload"]["parts"][1]["body"]["data"]).decode('utf-8') 
        except:
            try:
                html_body = base64.urlsafe_b64decode(email["payload"]["body"]["data"]).decode('utf-8') 
            except Exception as E:
                print("Error when parsing email object. Patterns tried: ")
                print('email["payload"]["parts"][1]["body"]["data"]')
                print('email["payload"]["body"]["data"]')
                print("Error: "+ E)


    elif format == "raw":
        try:
            html_body = base64.urlsafe_b64decode(email["raw"]).decode('utf-8') 
        except Exception as E:
            print("Error when parsing email object. Pattern tried: ")
            print('email["raw"]')
            print("Error: "+ E)

    return html_body     



In [None]:
def get_html_bodies_main(SCOPES, query, format) -> list:
    
    creds = validate_credentials(SCOPES)

    service = build("gmail", "v1", credentials=creds)

    results = get_email_Ids(query, service, format)

    html_bodies = []

    for item in results["messages"]:
        html_body = get_html(item, service, format)
        html_bodies.append(html_body)

    return html_bodies


In [None]:
SCOPES = ["https://www.googleapis.com/auth/gmail.modify"]
query = "from:quincy@freecodecamp.org is:unread"
format = "raw"
html_list = get_html_bodies_main(SCOPES, query, format)

In [None]:
html_list