In [1]:
'''
Import Statements: 
(i) pandas for dataframe and data analysis
(ii) numpy for mathematics and LinAlg Operations
(iii) XML tree to generate and process XML files (NOT USED)
(iv) sec_edgar_downloader to download SEC files
'''
import glob, os
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET
# To edit the sec_edgar_downloader, navigate to: /usr/local/lib/python3.7/sec_edgar_downloader/Downloader.py
# See: https://pypi.org/project/sec-edgar-downloader/ for further documentation
from sec_edgar_downloader import Downloader

In [2]:
'''
Method to download 13-F-HR SEC Filings from Mutual Funds and Hedge Funds
    @param: CIK: a string representing the CIK number for the fund
    @param: saveFile: a string representing the text file to save the data
                      eg. 'output.txt'
    @return: None
'''
def download_file(CIK, saveFile, basePath):
    dl = Downloader(basePath)
    dl.get('13F-HR',CIK,num_filings_to_download=1)
    xml_start = 0 ; xml_end = 0
    paths = []
    for file in glob.glob("{}/sec_edgar_filings/{}/13F-HR/*.txt".format(basePath,CIK)):
        paths.append(file)
        
    # This just pulls the first file. Amennd this to pick a file
    print("Loading CIK: {}".format(str(CIK)))
    path = paths[0]
    
    text = ""
    with open(path, 'r') as input:
        for line in input:
            if "<informationTable" in line:
                xml_start += 1
            if "</informationTable>" in line:
                xml_end += 1
            if xml_start >= 1:
                text += line
            if xml_end == 1:
                break
    with open("{}/sec_edgar_filings/{}/13F-HR/{}".format(basePath, CIK, saveFile), "w") as text_file:
        text_file.write(text)

In [3]:
'''
Method to generate a dataframe given a .txt or .xml save file containing 13-F-HR data
    @param: saveFile: a string representing the text or xml file that has the saved data
                      eg. 'output.txt'
    @return: a pandas dataframe with the following columns:
            (1) Company Name, (2) Class, (3) Value, (4) PutCall
'''
def generate_dataframe(saveFile):
    lengths = []
    tree = ET.parse(saveFile)
    root = tree.getroot() 
    preTag = root.tag.split('}')[0] + '}' 
    cols = {'nameOfIssuer': 'Company Name', 'titleOfClass': 'Class', 
            'value': 'Value', 'putCall': 'PutCall'}
    data = {'Company Name': [], 'Class': [], 'Value': [], 'PutCall': []}
    for i in range(len(root)):
        for col in cols:
            field = root[i].findall(preTag + col)
            if field:
                if col == 'value':
                    data[cols[col]].append(int(field[0].text))
                else:
                    data[cols[col]].append(field[0].text)
            else:
                data[cols[col]].append(np.nan)
    df = pd.DataFrame(data)
    return df

# Common CIKs#
D.E. Shaw: 1009207 <br>
Citadel Advisors LLC: : 1423053 <br>
Two Sigma Advisors LP: 1478735 <br>
Reinassance Technologies LLC: 1037389 <br>
Jane Street Group LLC: 1595888 <br>
Bridgewater Associates LP: 1350694 <br>
AQR Capital Management LLC: 1167557 <br>
Elliott Management Corporation: 1048445 <br>
Millennium Management LLC: 0001273087 <br>


In [4]:
'''
Method to download multiple different files for different funds. 
    @param: basePath: a path to the storage location of each funds data
    @param: CIKS: list of CIK numbers for each fund searched
    @return: None
'''
def download_files(basePath, CIKS):
    for CIK in CIKS:
        download_file(CIK, 'output.txt', basePath)

CIKS_to_download = ['1009207', '1423053', '1478735', '1037389', '1595888', '1350694', '1167557']
download_files("/Users/mymacbook/Desktop/SEC Parser", CIKS_to_download)

Loading CIK: 1009207
Loading CIK: 1423053
Loading CIK: 1478735
Loading CIK: 1037389
Loading CIK: 1595888
Loading CIK: 1350694
Loading CIK: 1167557


In [5]:
'''
Method to return a complete dataframe given a CIK and a base path
    @param: basePath: a path to the storage point of output.txt and where sec_edgar_filings will be stored
    @param: CIK: a string for the CIK number of the fund being searched 
    @return: fund_dict: a dictionary pointing each CIK to a dataframe with 
                        columns [Company Name, Class, Value, PutCall] for the given fund
'''
def loadDataFrame(basePath, CIKS):
    fund_dict = {}
    for CIK in CIKS:
        print("Processing CIK: {}".format(str(CIK)))
        df = generate_dataframe(basePath + "/sec_edgar_filings/{}/13F-HR/output.txt".format(str(CIK)))
        fund_dict[CIK] = df
    return fund_dict

CIKS_to_load = ['1009207', '1423053', '1478735', '1037389', '1595888', '1350694', '1167557']
fund_dict = loadDataFrame("/Users/mymacbook/Desktop/SEC Parser", CIKS_to_load)

Processing CIK: 1009207
Processing CIK: 1423053
Processing CIK: 1478735
Processing CIK: 1037389
Processing CIK: 1595888
Processing CIK: 1350694
Processing CIK: 1167557


In [17]:
def filterOnOptions(fund_dict, CIK):
    df = fund_dict[CIK]
    option_df = pd.merge(df[df['PutCall'] == 'Put'],df[df['PutCall'] == 'Call'], on='Company Name', how='outer')
    option_df = option_df.fillna(0)
    option_df['Diff'] = option_df['Value_y'] - option_df['Value_x']
    option_df.sort_values('Diff', ascending=False)
    return option_df

def filterOnStock(fund_dict, CIK):
    df = fund_dict[CIK]
    stock_df = df[df["PutCall"] != "Put"].merge(df[df["PutCall"] != "Call"], how='inner')
    stock_df.drop(columns=["PutCall"])
    stock_df.sort_values("Value", ascending=False)
    return stock_df