# Using Gmail - Google API

#### Find the e-mails in gmail

This script relies heavily on functionality stemming from Google's Gmail API, as indicatedin this python quickstart. 
https://developers.google.com/gmail/api/quickstart/python

Many of the functions and methods illustrated here come from the Google Python documentation. This script doesn't really present anything new, but serves as a working example. 

I used this script to download attachments in bulk from a mobile phone app that exported its data as .csv files distributed via e-mail. 

<b>Note-- In order for this script to work correctly, you MUST do the tutorial as written in the link above. Also, when completing that tutorial, in order to be able to switch 'folders' (aka: Labels) for messages, you must set the scope variables to end in 'gmail.modify' rather than 'gmail.readonly' in the quickstart.py script, as well as in the scope variable below</br>

In [202]:
# Import statements
from __future__ import print_function
import httplib2
import os

from apiclient import discovery
from apiclient import errors
import oauth2client
from oauth2client import client
from oauth2client import tools
import base64

import pandas as pd

### Scope variables

In [203]:
# NOTE: The application name is 
# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/gmail-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/gmail.modify'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Gmail API Python Quickstart'

### Function definition --- all of these functions are defined in the Gmail API documentation as samples

In [204]:
def get_credentials():
    """Gets valid user credentials from storage.

    If nothing has been stored, or if the stored credentials are invalid,
    the OAuth2 flow is completed to obtain the new credentials.

    Returns:
        Credentials, the obtained credential.
        
    NOTE: This probably won't work if you haven't completed step 1 of the Google quick start link listed at the top of this notebook.
    """
    home_dir = os.path.expanduser('~')
    credential_dir = os.path.join(home_dir, '.credentials')
    if not os.path.exists(credential_dir):
        os.makedirs(credential_dir)
    credential_path = os.path.join(credential_dir,
                                   'gmail-python-quickstart.json')

    store = oauth2client.file.Storage(credential_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else: # Needed only for compatibility with Python 2.6
            credentials = tools.run(flow, store)
        print('Storing credentials to ' + credential_path)
    return credentials

In [205]:
def ListMessagesMatchingQuery(service, user_id, query=''):
  """List all Messages of the user's mailbox matching the query.

  Args:
    service: Authorized Gmail API service instance.
    user_id: User's email address. The special value "me"
    can be used to indicate the authenticated user.
    query: String used to filter messages returned.
    Eg.- 'from:user@some_domain.com' for Messages from a particular sender.

  Returns:
    List of Messages that match the criteria of the query. Note that the
    returned list contains Message IDs, you must use get with the
    appropriate ID to get the details of a Message.
  """
  try:
    response = service.users().messages().list(userId=user_id,
                                               q=query).execute()
    messages = []
    if 'messages' in response:
      messages.extend(response['messages'])

    while 'nextPageToken' in response:
      page_token = response['nextPageToken']
      response = service.users().messages().list(userId=user_id, q=query,
                                         pageToken=page_token).execute()
      messages.extend(response['messages'])

    return messages
  except errors.HttpError, error:
    print %error

In [206]:
def ListMessagesWithLabels(service, user_id, label_ids=[]):
  """List all Messages of the user's mailbox with label_ids applied.

  Args:
    service: Authorized Gmail API service instance.
    user_id: User's email address. The special value "me"
    can be used to indicate the authenticated user.
    label_ids: Only return Messages with these labelIds applied.

  Returns:
    List of Messages that have all required Labels applied. Note that the
    returned list contains Message IDs, you must use get with the
    appropriate id to get the details of a Message.
  """
  try:
    response = service.users().messages().list(userId=user_id,
                                               labelIds=label_ids).execute()
    messages = []
    if 'messages' in response:
      messages.extend(response['messages'])

    while 'nextPageToken' in response:
      page_token = response['nextPageToken']
      response = service.users().messages().list(userId=user_id,
                                                 labelIds=label_ids,
                                                 pageToken=page_token).execute()
      messages.extend(response['messages'])

    return messages
  except errors.HttpError, error:
        print % error

In [207]:
def GetAttachments(service, user_id, msg_id, store_dir):
  """Get and store attachment from Message with given id.

  Args:
    service: Authorized Gmail API service instance.
    user_id: User's email address. The special value "me"
    can be used to indicate the authenticated user.
    msg_id: ID of Message containing attachment.
    store_dir: The directory used to store attachments.
  """
  try:
    message = service.users().messages().get(userId=user_id, id=msg_id).execute()

    for part in message['payload']['parts']:
      if part['filename']:
        attID = part['body']['attachmentId']
        #file_data = base64.urlsafe_b64decode(att_pad_correct.encode('UTF-7'))

        path = ''.join([store_dir, part['filename']])
        
        newfile = service.users().messages().attachments().get(userId='me', messageId=msg_ids.id[0], id=attID).execute()
        file_data = base64.urlsafe_b64decode(str(newfile['data']).encode('UTF-7'))
        f = open(path, 'w')
        f.write(file_data)
        f.close()

  except errors.HttpError, error:
    print % error

In [208]:
"""Modify an existing Message's Labels.
"""

#from apiclient import errors


def ModifyMessage(service, user_id, msg_id, msg_labels):
  """Modify the Labels on the given Message.

  Args:
    service: Authorized Gmail API service instance.
    user_id: User's email address. The special value "me"
    can be used to indicate the authenticated user.
    msg_id: The id of the message required.
    msg_labels: The change in labels.

  Returns:
    Modified message, containing updated labelIds, id and threadId.
  """
  try:
    message = service.users().messages().modify(userId=user_id, id=msg_id,
                                                body=msg_labels).execute()

    label_ids = message['labelIds']

    print('Message ID: %s - With Label IDs %s' % (msg_id, label_ids))
    return message
  except errors.HttpError, error:
    print('An error occurred: %s' % error)


def CreateMsgLabels(a, b):
  """Create object to update labels.

  Returns:
    A label update object.
  """
  return {'removeLabelIds': b, 'addLabelIds': a}

### Code to save attachments from messages satisfying a search condition

In [209]:
# Authorizes credentials obtained through the 'get_credentials' function
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('gmail', 'v1', http=http)

In [210]:
# Double check the scope. If you have 'read-only' as opposed to at least 'modify', the attachment saving will work,
# but the message re-labeling portion won't
credentials.retrieve_scopes(http)

{u'https://www.googleapis.com/auth/gmail.modify'}

In [211]:
# Find all messages where a particular subject. In this case, the subject from the app has the '.csv' at the end
# The query you would use here should be the same as what you would enter into the GUI search box
messages = ListMessagesMatchingQuery(service, 'me', query='subject:*.csv')

In [212]:
# Check the number of messages
len(messages)

97

In [213]:
# Check the id fields obtained from the messages
messages[0:5]

[{u'id': u'152ffeb5592fab4f', u'threadId': u'152ffeb5592fab4f'},
 {u'id': u'152ffeb0f0b81b59', u'threadId': u'152ffeb0f0b81b59'},
 {u'id': u'152ffeab5a752cab', u'threadId': u'152ffeab5a752cab'},
 {u'id': u'152fc1e60a8acfd8', u'threadId': u'152fc1e60a8acfd8'},
 {u'id': u'152fc1728bbfd1bc', u'threadId': u'152fc16e279a432d'}]

In [214]:
# Put the id fields into a dataframe
msg_ids = pd.DataFrame(messages)

In [215]:
# Check the head of msg_ids, just to make sure the df makes sense
msg_ids.head()

Unnamed: 0,id,threadId
0,152ffeb5592fab4f,152ffeb5592fab4f
1,152ffeb0f0b81b59,152ffeb0f0b81b59
2,152ffeab5a752cab,152ffeab5a752cab
3,152fc1e60a8acfd8,152fc1e60a8acfd8
4,152fc1728bbfd1bc,152fc16e279a432d


In [216]:
# Define a directory you want to use to store the attachments
directory = '/Dir/Path/Goes/Here/'

In [217]:
# Loop through the list of message ids. For each id, get the attachments, and save them to the directory. 
for x in msg_ids['id']:
    GetAttachments(service, 'me', x, directory)

### Change the labels on the messages to mark as read, and to move from inbox to specified folder

In [218]:
# Get a list of the Folder Labels within the Gmail account
results = service.users().labels().list(userId='me').execute()
labels = results.get('labels', [])

In [241]:
# Print the label list
if not labels:
    print('No labels found.')
else:
  print('LabelID: LabelName')
  print('------------------')
  for label in labels:
    print(label['id'] + ": " + label['name'])

LabelID: LabelName
------------------
CATEGORY_PERSONAL: CATEGORY_PERSONAL
CATEGORY_SOCIAL: CATEGORY_SOCIAL
Label_3: PowerliftingData
IMPORTANT: IMPORTANT
CATEGORY_UPDATES: CATEGORY_UPDATES
CATEGORY_FORUMS: CATEGORY_FORUMS
CHAT: CHAT
SENT: SENT
INBOX: INBOX
TRASH: TRASH
CATEGORY_PROMOTIONS: CATEGORY_PROMOTIONS
DRAFT: DRAFT
SPAM: SPAM
STARRED: STARRED
UNREAD: UNREAD


In [195]:
# Designate the labels you wish to add and remove to the messages
add_labels = ['Label_3'] # Note: If this is a folder you have added, use the label's id and NOT the name
remove_labels = ['INBOX', 'UNREAD']

In [196]:
# Create the json label change body for the http request
lbl_body = CreateMsgLabels(add_labels, remove_labels)

In [None]:
# Loop through the messages, and modify the labels
for x in msg_ids['id']:
    ModifyMessage(service, 'me', x, lbl_body)

### From there, load the attachment data saved earlier and analyze! 