# connect with Gmail

#### from google.oauth2.credentials import Credentials
In Python's Google Auth library, google.oauth2.credentials.Credentials is a class used to hold OAuth 2.0 credentials (specifically access and refresh tokens) that authorize an application to access a user's data. It serves as a container for the tokens needed to make authenticated calls to Google APIs, such as Google Drive or Calendar, on behalf of a specific resource owner. 
Key Uses
- Token Storage: It stores the current access_token and an optional refresh_token.
- Automatic Refresh: The object can automatically refresh an expired access token if a valid refresh token, client ID, and client secret are provided.
- API Authentication: It is used to apply authentication headers (like Authorization: Bearer <token>) to HTTP requests made to Google services.
- Service Integration: It is typically passed to the googleapiclient.discovery.build function to create a service object for interacting with specific Google APIs. 
Common Methods
-from_authorized_user_info(info, scopes=None): Creates a credentials instance from a dictionary containing user authorization data.
-from_authorized_user_file(filename, scopes=None): Creates a credentials instance directly from a saved JSON file (often named token.json or authorized_user.json).
- refresh(request): Manually triggers a refresh of the access token using the provided transport object.
- apply(headers): Adds the necessary authentication token to a dictionary of HTTP request headers. 

#### from google_auth_oauthlib.flow import InstalledAppFlow
#### from googleapiclient.discovery import build

In the Google Python API environment, these two imports represent the two distinct phases of interacting with a Google service: authentication and execution. 
1. InstalledAppFlow (The Authentication Phase)
The InstalledAppFlow class from google_auth_oauthlib.flow is used to handle the OAuth 2.0 authorization flow specifically for applications that run on a user's local machine (desktop apps or local scripts). 
- Its Job: It manages the process of getting permission from a user. It typically opens a web browser to let the user log into their Google account and approve the permissions (scopes) your app is requesting.
- Key Function: After the user approves, InstalledAppFlow captures the authorization code and exchanges it for a Credentials object containing an access token.
- Common Method: flow.run_local_server() starts a temporary local web server to listen for the redirect from Google's authorization page. 
2. build (The Execution Phase)
The build function from googleapiclient.discovery is used to create a Service Object, which acts as a bridge between your Python code and a specific Google API. 
- Its Job: It dynamically constructs a "client" that knows all the methods, parameters, and endpoints for a specific Google service (like Google Drive, Sheets, or Calendar) based on an online "Discovery Document".
- Key Function: It takes the API name, the version, and the Credentials (obtained from the first step) to initialize a ready-to-use service.
- Example Usage: service = build('drive', 'v3', credentials=creds) creates a service object. You can then call service.files().list().execute() to list files in Google Drive. 

In [None]:
# all essential libraries 

from __future__ import print_function
from google.oauth2.credentials import Credentials

from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
import os
import pickle
from google.auth.transport.requests import Request


In [None]:
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def gmail_connect():

    creds = None

    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json',SCOPES)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(request=())

        else:
            flow = InstalledAppFlow.from_client_secrets_file('credentials.json',SCOPES)
            creds = flow.run_local_server(port = 0)

        with open('token.json','w+') as token:
            token.write(creds.to_json())


    return build('gmail','v1',credentials=creds)


service = gmail_connect()



In [None]:
def recent_emails(service, maxEmails=5):

    results = service.users().messages().list( userId = 'me' , maxResults = maxEmails).execute()

    messages = results.get('messages',[])

    for message in messages:
        msg = service.users().messages().get( userId = 'me', id=message['id']).execute()

        headers = msg['payload']['headers']
        subject = next(h['value'] for h in headers if h['name'] == 'Subject')
        sender = next(h['value'] for h in headers if h['name'] == 'From')

        print(f'subject:{subject}')
        print(f"from:{sender}")
        print(f"content:{msg.get('snippet','No snippet')}")
        print("-"*60)

    return messages,msg,headers,subject,sender

messages,msg,headers,subject,sender = recent_emails(service)


In [None]:
print(f"{msg}\n\n{headers}\n\n{subject}\n\n{sender}")

In [None]:
import base64

def process_message(msg):
    """Process a Gmail message and extract all useful info"""
    
    # Get headers
    headers = msg['payload']['headers']
    subject = next((h['value'] for h in headers if h['name'] == 'Subject'), 'No Subject')
    sender = next((h['value'] for h in headers if h['name'] == 'From'), 'Unknown Sender')
    
    print(f"Subject: {subject}")
    print(f"From: {sender}")
    print("-" * 50)
    
    # Extract body
    body = extract_body(msg['payload'])
    print(f"Body preview: {body[:200]}...")
    
    return body

def extract_body(payload):
    """Recursively extract body from payload"""
    
    # Base case: if there's data in body
    if 'data' in payload.get('body', {}):
        try:
            return base64.urlsafe_b64decode(payload['body']['data']).decode('utf-8')
        except:
            return ""
    
    # If there are parts (multipart email)
    if 'parts' in payload:
        for part in payload['parts']:
            body = extract_body(part)
            if body and part.get('mimeType') == 'text/plain':
                return body
        
        # If no plain text, return HTML
        for part in payload['parts']:
            body = extract_body(part)
            if body and part.get('mimeType') == 'text/html':
                return body
    
    return ""

### decode Gmail content


In [None]:
import base64

if 'data' in msg['payload']['body']:
    raw_data = msg['payload']['body']['data']
    # Replace URL-safe characters and decode
    decoded_data = base64.urlsafe_b64decode(raw_data).decode('utf-8')
    print(decoded_data)  # This is the actual email body

In [None]:
if msg['payload']['mimeType'] == 'text/plain':
    data = msg['payload']['body']['data']
    decoded = base64.urlsafe_b64decode(data).decode('utf-8')
    print(decoded)

In [None]:
body = process_message(msg)

In [None]:
body

In [None]:
import base64
from googleapiclient.discovery import build

def get_full_messages(service, max_results=5):
    """Get full content of emails"""
    
    # First, list message IDs
    results = service.users().messages().list(
        userId='me', 
        maxResults=max_results
    ).execute()

    '''results: It returns only: [{ "id": "...", "threadId": "..." }] Not the content yet.'''
    
    messages = results.get('messages', []) # Pulls out the list of message IDs.
    
    full_messages = []
    
    for msg_info in messages: # Each msg_info contains: { "id": "XYZ123" }

        # Get FULL message content

        '''It downloads: Subject Sender Body Attachments MIME structure'''
        msg = service.users().messages().get(
            userId='me', 
            id=msg_info['id'],
            format='full'  
        ).execute()
        
        # Process the full message
        processed_msg = extract_complete_message(msg)
        full_messages.append(processed_msg)
        
        print(f"Email {len(full_messages)}: {processed_msg['subject']}...") # shows proress 
    
    return full_messages

def extract_complete_message(msg):
    """Extract all information from a full message"""
    
    # Extract headers
    '''This contains: From Subject Date To'''
    headers = msg['payload']['headers']

    subject = next((h['value'] for h in headers if h['name'] == 'Subject'), 'No Subject') # get subject
    sender = next((h['value'] for h in headers if h['name'] == 'From'), 'Unknown') # get sender email
    date = next((h['value'] for h in headers if h['name'] == 'Date'), 'No Date') # get date 
    
    # Extract body content
    body_text = ""
    body_html = ""
    
    def extract_from_payload(payload):
        """Recursively extract from payload"""
        text = ""
        html = ""
        
        # Check if this part has a body
        if 'body' in payload and 'data' in payload['body']:
            try:
                decoded = base64.urlsafe_b64decode(payload['body']['data']).decode('utf-8') # Gmail gives body in base64 → you convert it back to text.
                if payload.get('mimeType') == 'text/plain':
                    text = decoded
                elif payload.get('mimeType') == 'text/html':
                    html = decoded
                '''Gmail can have: plain text HTML both capture both.'''
            except:
                pass
        
        # Recursively check parts, find the body even if it is nested.
        if 'parts' in payload:
            for part in payload['parts']:
                t, h = extract_from_payload(part)
                text += t
                html += h
        
        return text, html
    
    body_text, body_html = extract_from_payload(msg['payload'])
    
    # If no text body but we have HTML, use HTML
    if not body_text and body_html:
        # Simple HTML to text conversion (remove tags)
        import re
        body_text = re.sub('<[^<]+?>', '', body_html)

    # retrive whole metadata
    return {
        'id': msg['id'],
        'subject': subject,
        'sender': sender,
        'date': date,
        'body_text': body_text,
        'body_html': body_html,
        'snippet': msg.get('snippet', ''),
        'full_message': msg  # Keep the full raw message if needed
    }


def print_full_emails(messages):
    """Print full email content"""
    for i, msg in enumerate(messages, 1):
        print(f"\n{'='*80}")
        print(f" EMAIL {i}/{len(messages)}")
        print(f"{'='*80}")
        print(f"Subject: {msg['subject']}")
        print(f"From: {msg['sender']}")
        print(f"Date: {msg['date']}")
        print(f"\n{'─'*80}")
        print("BODY:")
        print(f"{'─'*80}")
        
        # Show body text (or snippet if body is empty)
        if msg['body_text']:
            # Truncate if too long
            body_preview = msg['body_text'] + "..." if len(msg['body_text']) > 1000 else msg['body_text']
            print(body_preview)
        else:
            print(msg['snippet'])
        
        # Show character count
        print(f"\nBody length: {len(msg['body_text'])} characters")
        print(f"{'='*80}\n")


messages = get_full_messages(service, max_results=5)
print_full_emails(messages)