Skip to content

Commit

Permalink
fixed complete-submission-extract-broken
Browse files Browse the repository at this point in the history
  • Loading branch information
ryansmccoy committed May 1, 2019
1 parent 488b38c commit 6462844
Show file tree
Hide file tree
Showing 13 changed files with 532 additions and 202 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -35,7 +35,7 @@ wheels/
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
# Unit tests / coverage reports
htmlcov/
.tox/
.coverage
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -8,5 +8,5 @@ python:
# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
install: pip install -U tox-travis

# Command to run tests, e.g. python setup.py test
# Command to run tests, e.g. python setup.py tests
script: tox
144 changes: 72 additions & 72 deletions README.rst

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions docs/installation.rst
Expand Up @@ -19,8 +19,6 @@ Once you have a copy of the source, you can install it with:

.. code-block:: console
$ python setup.py install
$ pip install -r requirements.txt
.. _Github repo: https://github.com/ryansmccoy/py_sec_edgar
.. _tarball: https://github.com/ryansmccoy/py_sec_edgar/tarball/master
15 changes: 6 additions & 9 deletions py_sec_edgar/broker.py
@@ -1,24 +1,21 @@
from pprint import pprint
import logging

logger = logging.getLogger(__name__)

from py_sec_edgar.download import download_filing
from py_sec_edgar.extract import extract_contents
from py_sec_edgar.utilities import prepare_filepaths
from py_sec_edgar.filing import download_filing
from py_sec_edgar.utilities import prepare_message

def broker(filing, extract_filing_contents=False):
def broker(sec_filing, extract_filing_contents=False):
"""
Manages the individual filing extraction process
"""
filing_message = prepare_message(sec_filing)

feed_item = prepare_filepaths(filing)
pprint(feed_item)
feed_item = download_filing(feed_item)
filing_message = download_filing(filing_message)

if extract_filing_contents:

feed_item = extract_contents(feed_item)
feed_item = extract_contents(filing_message)



Expand Down
62 changes: 23 additions & 39 deletions py_sec_edgar/download.py
@@ -1,49 +1,33 @@
import os
import shutil
import logging
# -*- coding: utf-8 -*-
from pprint import pprint
import os, sys
import click

logger = logging.getLogger(__name__)
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.5f' % x) # pandas
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', 600)

from py_sec_edgar.proxy import ProxyRequest
import zipfile
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

def download_filing(feed_item, zip_filing=False):
"""
{'CIK': 104169,
'Company Name': 'Walmart Inc.',
'Date Filed': '2019-03-28',
'Filename': 'edgar/data/104169/0000104169-19-000016.txt',
'Form Type': '10-K',
'cik_directory': 'C:\\sec_gov\\Archives\\edgar\\data\\104169\\',
'extracted_filing_directory': 'C:\\sec_gov\\Archives\\edgar\\data\\104169\\000010416919000016',
'filing_filepath': 'C:\\sec_gov\\Archives\\edgar\\data\\104169\\0000104169-19-000016.txt',
'filing_folder': '000010416919000016',
'filing_url': 'https://www.sec.gov/Archives/edgar/data/104169/0000104169-19-000016.txt',
'filing_zip_filepath': 'C:\\sec_gov\\Archives\\edgar\\data\\104169\\0000104169-19-000016.zip',
'published': '2019-03-28',
'url': 'https://www.sec.gov/Archives/edgar/data/104169/0000104169-19-000016.txt'}
"""
if not os.path.exists(feed_item['cik_directory']):
import py_sec_edgar.feeds as py_sec_edgar_feeds
import py_sec_edgar.broker as py_sec_edgar_etl

os.makedirs(feed_item['cik_directory'])
@click.command()
@click.option('--ticker_list_filter', default=True)
@click.option('--form_list_filter', default=True)
@click.option('--save_output', default=False)
def main(ticker_list_filter, form_list_filter, save_output):

if not os.path.exists(feed_item['filing_filepath']):
py_sec_edgar_feeds.update_full_index_feed(skip_if_exists=True)

g = ProxyRequest()
df_filings_idx = py_sec_edgar_feeds.load_filings_feed(ticker_list_filter=ticker_list_filter, form_list_filter=form_list_filter)

g.GET_FILE(feed_item['filing_url'], feed_item['filing_filepath'])
for i, filing in df_filings_idx.iterrows():

# todo: celery version of download full
# consume_complete_submission_filing_txt.delay(feed_item, filepath_cik)
py_sec_edgar_etl.broker(filing)

elif os.path.exists(feed_item['filing_filepath']) or os.path.exists(feed_item['filing_zip_filepath']):
logger.info(f"\n\nFile Already exists\t {feed_item['filing_filepath']}\n\n")
else:
logger.info(f"\n\nSomething Might be wrong\t {feed_item['filing_filepath']}\n\n")
if __name__ == "__main__":

if zip_filing:

zipfile.ZipFile(feed_item['filing_zip_filepath'], mode='w', compression=zipfile.ZIP_DEFLATED).write(feed_item['filing_filepath'])
os.remove(feed_item['filing_filepath'])

return feed_item
main()
33 changes: 0 additions & 33 deletions py_sec_edgar/extract.py

This file was deleted.

0 comments on commit 6462844

Please sign in to comment.