In [None]:
import os
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.discovery import build

In [None]:
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']


def get_service():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json')
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)


In [None]:
service = get_service()

In [None]:
def get_header_val(msg, val):
  return [obj['value'] for obj in msg['payload']['headers'] if obj['name'] == val][0]

def get_date(msg):
  return get_header_val(msg,'Date')

def get_from(msg):
  return get_header_val(msg,'From')

In [None]:
def month_to_string(month):
  mapping = {
    'Jan': '01',
    'Feb': '02',
    'Mar': '03',
    'Apr': '04',
    'May': '05',
    'Jun': '06',
    'Jul': '07',
    'Aug': '08',
    'Sep': '09',
    'Oct': '10',
    'Nov': '11',
    'Dec': '12',
  }
  return mapping[month]

def to_string(date):
  print(f'to string on {date}')
  if ',' in date:
    date = date.split(',')[1]
    date = date.lstrip()
  strs = date.split(' ')
  month = month_to_string(strs[1])
  return f'{strs[2]}-{month}-{strs[0]}'

to_string('Mon,  9 Oct 2023 01:15:44 +0000')
to_string('14 Feb 2023 13:59:46 -0500')


In [None]:
all_senders = dict()
total_emails = dict()

In [None]:
def get_and_process_messages_before(date):
  query = f'before:{date}'
  print(f'fetching messages before {date} with query {query}')
  results = service.users().messages().list(userId='anna@annalaifer.com', maxResults=200, q=query).execute()
  messages = results.get('messages', [])

  for message in messages:
    total_emails[message['id']] = 1
    msg = service.users().messages().get(userId='anna@annalaifer.com', id=message['id']).execute()
    from_ = get_from(msg)
    if '<' in from_:
      from_email = from_.split('<')[1].split('>')[0]
      from_name = from_.split('<')[0]
    else:
      from_email = from_
      from_name = ''
    all_senders[from_email] = from_name
  
    date = to_string(get_date(msg))
  print(f'found {len(all_senders.keys())} senders in {len(total_emails.keys())} emails')
  print(f'returning new date {date}')
  
  return date

In [None]:
date = '2023-02-17'
while (date):
  date = get_and_process_messages_before(date)

In [None]:
import csv

# Specify the file name
filename = "output.csv"

# Writing to csv file
with open(filename, 'w', newline='') as csvfile:
  # Get the keys from the first dictionary (which are the column names)
  fieldnames = ['email','name']
  writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

  # Write the header (column names)
  writer.writeheader()

  # Write the data
  for k,v in sorted(all_senders.items()):
    writer.writerow({
      'email': k,
      'name': v
    })