Skip to content

Commit

Permalink
Add proper date parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
metaodi committed May 27, 2019
1 parent 2885ce2 commit 5221e27
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
1 change: 1 addition & 0 deletions requirements.txt
@@ -1,2 +1,3 @@
beautifulsoup4
requests
dateparser
17 changes: 15 additions & 2 deletions scraper.py
Expand Up @@ -5,6 +5,7 @@
import sqlite3
import re
from urllib.parse import urljoin
import dateparser
import traceback

DATABASE_NAME = 'data.sqlite'
Expand Down Expand Up @@ -94,10 +95,22 @@ def parse_dates_page(date_page_url, conn):

c = conn.cursor()
for vote_link in vote_links:
vote_date = vote_link.text.strip()
vote_date_str = vote_link.text.strip()
print("")
print("")
print(vote_date)
print(vote_date_str)

try:
vote_datetime = dateparser.parse(
vote_date_str,
languages=['de']
)
vote_date = vote_datetime.date().isoformat()
print("Vote date: %s" % vote_date)
except (AttributeError, ValueError):
print("Couldn't parse date: %s" % vote_date_str)
vote_date = vote_date_str

vote_url = urljoin(date_page_url, vote_link['href'])
vote_page = requests.get(vote_url)
soup = BeautifulSoup(vote_page.content, 'html.parser')
Expand Down

0 comments on commit 5221e27

Please sign in to comment.