### get documents attached with emails 


In [1]:
from __future__ import print_function
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
import os

In [2]:
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def gmail_connect():
    
    creds = None

    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
        return build('gmail','v1',credentials = creds)

    else:
        print("file 'token.json' not found in current directory.")

    
service = gmail_connect()
    

In [3]:
def recent_emails(service, maxEmails=5):

    results = service.users().messages().list( userId = 'me' , maxResults = maxEmails).execute()

    messages = results.get('messages',[])

    for message in messages:
        msg = service.users().messages().get( userId = 'me', id=message['id']).execute()

        headers = msg['payload']['headers']
        subject = next(h['value'] for h in headers if h['name'] == 'Subject')
        sender = next(h['value'] for h in headers if h['name'] == 'From')

        print(f'subject:{subject}')
        print(f"from:{sender}")
        print(f"content:{msg.get('snippet','No snippet')}")
        print("-"*60)

    return messages,msg,headers,subject,sender

messages,msg,headers,subject,sender = recent_emails(service)

msg

subject:Bharat Coking Coal Ltd IPO - Allotment Not Successful
from:Angel One <donotreply@angelone.in>
content:Angel One Bharat Coking Coal Ltd IPO - Allotment Not Successful Dear Padsala, You have not received an allotment for the Bharat Coking Coal Ltd IPO. A refund of ₹13800.00 blocked for your IPO
------------------------------------------------------------
subject:Screener.in - Watchlist updates
from:"Screener.in" <no-reply@screener.in>
content:Latest updates NTPC Rating update Disclosure Under Regulation 30 Of The SEBI (LODR) Regulations, 2015 Disclosure Under Regulation 30 Of The SEBI (LODR) Regulations, 2015 Updates in your watchlist are
------------------------------------------------------------
subject:Build your mobile app idea in minutes
from:Replit <contact@mail.replit.com>
content:Build your mobile app idea in minutes No coding. No Xcode. Just describe what you want. Today, we&#39;re launching Mobile Apps on Replit—the fastest way to go from idea to a working app on your 

{'id': '19bbf90ec59906ad',
 'threadId': '19bbf90ec59906ad',
 'labelIds': ['UNREAD', 'CATEGORY_UPDATES', 'INBOX'],
 'snippet': 'Latest updates Coal India Filing Of Prospectus Of Bharat Coking Coal Limited (BCCL) , A Wholly Owned Subsidiary Of Coal India Limited, With The Registrar Of Companies, Jharkhand At Ranchi On January 14',
 'payload': {'partId': '',
  'mimeType': 'multipart/alternative',
  'filename': '',
  'headers': [{'name': 'Delivered-To', 'value': 'tirthpadsala1@gmail.com'},
   {'name': 'Received',
    'value': 'by 2002:a05:6918:eb12:b0:414:876:1610 with SMTP id hk18csp632935ysb;        Wed, 14 Jan 2026 18:51:45 -0800 (PST)'},
   {'name': 'X-Received',
    'value': 'by 2002:a05:6a21:6da3:b0:38d:ec17:17a with SMTP id adf61e73a8af0-38dec170b14mr2248297637.65.1768445504837;        Wed, 14 Jan 2026 18:51:44 -0800 (PST)'},
   {'name': 'ARC-Seal',
    'value': 'i=1; a=rsa-sha256; t=1768445504; cv=none;        d=google.com; s=arc-20240605;        b=WWDEGdlqZQsmGJK2upfZVu3QlPU1wIZvp

### filter emails with attachments

In [4]:
def attachment_emails(service , maxEmails=10):

    results = service.users().messages().list( userId = 'me' , q="has:attachment" , maxResults = maxEmails).execute()
    print(f"found {results.get('resultSizeEstimate' , 0)} emails with attachment..")

    messages = results.get('messages',[])

    for message in messages:
        msg = service.users().messages().get( userId = 'me', id=message['id'] , format='full').execute()

        headers = msg['payload']['headers']
        subject = next(h['value'] for h in headers if h['name'] == 'Subject')
        sender = next(h['value'] for h in headers if h['name'] == 'From')

        docAttaches = []

        def check_parts(parts):
            attachments=[]

            for part in parts:
                if 'filename' in part['body'] and part['body']['filename']:
                    filename = part['body']['filename']
                    if filename.lower().endswith(('.pdf', '.docx', '.doc')):
                        attachments.append(filename)
                
                if 'parts' in part:
                    attachments.extend(check_parts(part['parts']))
            
            return attachments

        if 'parts' in msg['payload']:
            docAttaches = check_parts(msg['payload']['parts'])

        if docAttaches:

            print("-"*60)
            print(f"from:{sender}")
            print(f"subject:{subject}")
            print(f"Documnets:{','.join(docAttaches)}")
        
    return docAttaches

docAttaches = attachment_emails(service)
docAttaches


found 201 emails with attachment..


[]

In [6]:
import os
import base64

def download_attachments(service, maxEmails=5, download_folder='email_attachments'):
    # Create download folder if it doesn't exist
    if not os.path.exists(download_folder):
        os.makedirs(download_folder)
    
    results = service.users().messages().list(userId='me', q="has:attachment", maxResults=maxEmails).execute()
    print(f"Found {results.get('resultSizeEstimate', 0)} emails with attachments..")

    messages = results.get('messages', [])
    all_downloaded = []

    for message in messages:
        msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()

        headers = msg['payload']['headers']
        subject = next((h['value'] for h in headers if h['name'] == 'Subject'), 'No Subject')
        sender = next((h['value'] for h in headers if h['name'] == 'From'), 'Unknown')

        def check_and_download_parts(parts, msg_id):
            downloaded = []

            for part in parts:
                if part.get('filename'):
                    filename = part['filename']
                    
                    # Check if it's a document type you want
                    if filename.lower().endswith(('.pdf', '.docx', '.doc')):
                        # Get attachment data
                        if 'attachmentId' in part['body']:
                            attachment = service.users().messages().attachments().get(
                                userId='me',
                                messageId=msg_id,
                                id=part['body']['attachmentId']
                            ).execute()
                            
                            # Decode the attachment data
                            file_data = base64.urlsafe_b64decode(attachment['data'].encode('UTF-8'))
                            
                            # Save the file
                            filepath = os.path.join(download_folder, filename)
                            
                            # Handle duplicate filenames
                            counter = 1
                            base_name, extension = os.path.splitext(filename)
                            while os.path.exists(filepath):
                                filepath = os.path.join(download_folder, f"{base_name}_{counter}{extension}")
                                counter += 1
                            
                            with open(filepath, 'wb') as f:
                                f.write(file_data)
                            
                            downloaded.append({'filename': filename, 'path': filepath})
                            print(f"  ✓ Downloaded: {filename}")
                
                # Recursively check nested parts
                if 'parts' in part:
                    downloaded.extend(check_and_download_parts(part['parts'], msg_id))
            
            return downloaded

        # Process email parts
        email_attachments = []
        if 'parts' in msg['payload']:
            email_attachments = check_and_download_parts(msg['payload']['parts'], message['id'])
        
        if email_attachments:
            print("-" * 60)
            print(f"From: {sender}")
            print(f"Subject: {subject}")
            all_downloaded.extend(email_attachments)
    
    print("\n" + "=" * 60)
    print(f"Total files downloaded: {len(all_downloaded)}")
    return all_downloaded


downloaded_files = download_attachments(service, maxEmails=5)

# Access the files
for file_info in downloaded_files:
    print(f"File: {file_info['filename']} -> Saved at: {file_info['path']}")



Found 201 emails with attachments..
  ✓ Downloaded: IWTXXXXX6D_BHARATCOAL_IPO_ORDER_7335802.pdf
------------------------------------------------------------
From: nse_alerts <nse_alerts@nse.co.in>
Subject: NSE - Public issue - Bidding information
  ✓ Downloaded: 12685_Collateral Report_31-12-2025_78445990333809.pdf
------------------------------------------------------------
From: inspectioninfo@mcxindia.com
Subject: Segregation and Monitoring of Collateral at Client Level for 31/12/2025 with Angel One Limited
  ✓ Downloaded: cloud workshop.pdf
------------------------------------------------------------
From: Vivaan Benrjee <vivaanbenrjee@gmail.com>
Subject: Hi
  ✓ Downloaded: ROS_ROF_AABZ656274.pdf
------------------------------------------------------------
From: Angel One <contract.notes@angeltrade.in>
Subject: Register of Securities & Funds for week ended Jan 10 2026
  ✓ Downloaded: AABZ656274.pdf
------------------------------------------------------------
From: BSE ALERTS <info@

In [7]:
downloaded_files

[{'filename': 'IWTXXXXX6D_BHARATCOAL_IPO_ORDER_7335802.pdf',
  'path': 'email_attachments\\IWTXXXXX6D_BHARATCOAL_IPO_ORDER_7335802.pdf'},
 {'filename': '12685_Collateral Report_31-12-2025_78445990333809.pdf',
  'path': 'email_attachments\\12685_Collateral Report_31-12-2025_78445990333809.pdf'},
 {'filename': 'cloud workshop.pdf',
  'path': 'email_attachments\\cloud workshop.pdf'},
 {'filename': 'ROS_ROF_AABZ656274.pdf',
  'path': 'email_attachments\\ROS_ROF_AABZ656274.pdf'},
 {'filename': 'AABZ656274.pdf', 'path': 'email_attachments\\AABZ656274.pdf'}]