Skip to content

Commit

Permalink
added supertype, closes #10
Browse files Browse the repository at this point in the history
  • Loading branch information
navolotsky committed Oct 24, 2018
1 parent 29b1f07 commit 2d6450f
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 2 deletions.
Binary file modified dist/web_crawler-0.1-py3-none-any.whl
Binary file not shown.
Binary file modified dist/web_crawler-0.1.tar.gz
Binary file not shown.
6 changes: 4 additions & 2 deletions web_crawler/ksrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ def get_decision_headers(pagesNumber=None, sourcePrefix='КСРФ'):
('not unique', notUniqueHeaders)
page = html.document_fromstring(get_page_html_by_num(
driver, template, i))
if False: # debug print:
print(f"Pages downloaded: {i-1}/{pagesNumber}")
driver.quit()
return courtSiteContent

Expand Down Expand Up @@ -156,7 +158,7 @@ def download_text(url, docID, folderName, needSaveTxtFile=False,
return pathToTXT


def download_all_texts(courtSiteContent, folderName='Decision files'):
def download_all_texts(courtSiteContent, folderName='Decision files', needSaveTxtFile=True):
# TO DO: check for downloading and converting
if not os.path.exists(folderName):
os.mkdir(folderName)
Expand All @@ -165,7 +167,7 @@ def download_all_texts(courtSiteContent, folderName='Decision files'):
continue
pathToTXT = download_text(
courtSiteContent[decisionID]['text_source_url'],
decisionID, folderName)
decisionID, folderName, needSaveTxtFile=True)
courtSiteContent[decisionID]['text_location'] = pathToTXT
return courtSiteContent

Expand Down

0 comments on commit 2d6450f

Please sign in to comment.