# Parsing Robinhood's order flow with Python, by Tom Mason
This notebook pulls data from the .xml files that Robinhood files each quarter wherein it discloses the payments it receives from venues for routing order flow to them. The data is reported at a very granular level (type of security and market venue), so this script sums all the payments together to arrive at a total for each month. The .xml files can be found on Robinhood's website.

In [1]:
import pandas as pd
import numpy as np

In [4]:
# Create a function that pulls all .xml files from the folder with the 606 filings in it
def get_xml_files(directory):
    file_list = []
    import os
    for filename in os.listdir(directory):
        if filename.endswith(".xml"):
            file_list.append(directory + "/" + filename)
    return file_list

In [2]:
# Create a function to parse the xml files
def parse_xml(file_name):
    import xml.etree.ElementTree as et
    tree = et.parse(file_name)
    root = tree.getroot()
    full_list = []
    company = root.find('bd').text
    monthly_orders = []
    for month in root.iter('rMonthly'):
        mon = int(month.find('mon').text)
        year = int(month.find('year').text)
        orders = 0
        for exchanges in month:
            for venues in exchanges:
                for venue in venues:
                    mkt_orders = float(venue.find('netPmtPaidRecvMarketOrdersUsd').text)
                    limit_orders = float(venue.find('netPmtPaidRecvMarketableLimitOrdersUsd').text)
                    non_limit_orders = float(venue.find('netPmtPaidRecvNonMarketableLimitOrdersUsd').text)
                    other_orders = float(venue.find('netPmtPaidRecvOtherOrdersUsd').text)
                    orders = orders + mkt_orders + limit_orders + non_limit_orders + other_orders
        # Convert to millions of U.S. dollars
        monthly_orders.append([mon,year,orders / 1000000])
    return monthly_orders

In [6]:
file_list = get_xml_files('robinhood_files')
xml = []
for file_name in file_list:
    xml = xml + parse_xml(file_name)

In [7]:
df = pd.DataFrame(xml,columns=['month','year','total_payments_mil'])

In [9]:
df.to_csv('robinhood.csv')