In [5]:
import json
import pickle
import os.path
import base64
from datetime import datetime
from __future__ import print_function

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

import pandas as pd

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
SECRETS_PATH = "./local/secrets"

In [6]:
# connect GMAIL API
creds = None
if os.path.exists(f'{SECRETS_PATH}/token.pickle'):
    with open(f'{SECRETS_PATH}/token.pickle', 'rb') as token:
        creds = pickle.load(token)
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            f'{SECRETS_PATH}/credentials.json', SCOPES)
        creds = flow.run_local_server(port=0)
    with open(f'{SECRETS_PATH}/token.pickle', 'wb') as token:
        pickle.dump(creds, token)

service = build('gmail', 'v1', credentials=creds)

In [7]:
# email ids
results = service.users().messages().list(userId="me", labelIds=["INBOX"], q="subject: ARK Investment Management Trading Information").execute()
messages = results.get("messages", [])
print(len(messages))

25


In [8]:
with open("./local/processed_emails.pickle", "rb") as file:
    processed_emails = list(pickle.load(file))

for message in messages: # TO-DO: multi-proc
    if message["id"] not in processed_emails:
        try:
            msg = service.users().messages().get(userId="me", id=message["id"], format="full").execute()
            msg_time = float(msg.get("internalDate", 0)) / 1000
            msg_ts = datetime.utcfromtimestamp(int(msg_time))
            msg_data = msg["payload"]["body"]["data"]
            print(msg_ts)
            msg_html = base64.urlsafe_b64decode(msg_data).decode()

            dfs = pd.read_html(msg_html, header=0, index_col=0)
            if len(dfs)>1:
                raise ValueError("found more than 1 table")
#                 pass
            df = dfs[0]

            df.columns = df.columns.str.lower()
            df["date"] = pd.to_datetime(df["date"])
            if df["date"].nunique()!=1:
                raise ValueError("number of unique dates should be 1")
            trade_date = df["date"].iloc[0].strftime("%Y-%m-%d")
            
            df.to_csv(f"./trades/raw/{trade_date}_ARK_TRADES.csv", index=False)
            processed_emails.append(message["id"])
        except Exception as err:
            print(f"error: {err} for message id: {message.get('id', 'N/A')}")

with open("./local/processed_emails.pickle", "wb") as file:
    pickle.dump(set(processed_emails), file)

2021-03-09 01:27:15


In [5]:
# with open("./local/processed_emails.pickle", "wb") as file:
#     pickle.dump(set({}), file)