Skip to content

Commit

Permalink
facebook email: handle other datetime string formats
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Jul 9, 2019
1 parent 878f803 commit dcb6156
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 5 deletions.
17 changes: 12 additions & 5 deletions granary/facebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import xml.sax.saxutils

from bs4 import BeautifulSoup
import dateutil.parser
import mf2util
from oauth_dropins.webutil import util

Expand Down Expand Up @@ -1669,12 +1670,7 @@ def email_to_object(cls, html):
if not comment:
return None

# example email date/time string: 'December 14 at 12:35 PM'
published = datetime.strptime(when.get_text(strip=True), '%B %d at %I:%M %p')\
.replace(year=now_fn().year)

obj = {
'published': published.isoformat(util.T),
'author': {
'objectType': 'person',
'displayName': name,
Expand All @@ -1685,6 +1681,17 @@ def email_to_object(cls, html):
'to': [{'objectType':'group', 'alias':'@public'}],
}

# try to parse datetime string. examples seen in the wild:
# December 14 at 12:35 PM
# 5 July at 21:50
when = when.get_text(strip=True)
try:
parsed = dateutil.parser.parse(when, default=now_fn())
obj['published'] = parsed.isoformat(util.T)
except (ValueError, OverflowError):
logging.warning("Couldn't parse datetime string %r", when, exc_info=True)

# extract Facebook post ID from URL
url_parts = urllib.parse.urlparse(resp_url)
path = url_parts.path.strip('/').split('/')
url_params = urllib.parse.parse_qs(url_parts.query)
Expand Down
22 changes: 22 additions & 0 deletions granary/tests/test_facebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -3152,6 +3152,28 @@ def test_email_to_object_comment_different_text(self):
email = COMMENT_EMAIL_USERNAME.replace('commented on your', 'commented on')
self.assert_equals(EMAIL_COMMENT_OBJ_USERNAME, self.fb.email_to_object(email))

def test_email_to_object_comment_different_datetime_format(self):
"""https://console.cloud.google.com/errors/CKORxvuphMyeIw
"""
facebook.now_fn().AndReturn(datetime(1999, 1, 1))
self.mox.ReplayAll()

email = COMMENT_EMAIL_USERNAME.replace('December 14 at 12:35 PM',
'14 December at 12:35')
self.assert_equals(EMAIL_COMMENT_OBJ_USERNAME, self.fb.email_to_object(email))

def test_email_to_object_comment_bad_datetime(self):
"""https://console.cloud.google.com/errors/CKORxvuphMyeIw
"""
facebook.now_fn().AndReturn(datetime(1999, 1, 1))
self.mox.ReplayAll()

email = COMMENT_EMAIL_USERNAME.replace('December 14 at 12:35 PM',
'asdf 29 qwert')
expected = copy.deepcopy(EMAIL_COMMENT_OBJ_USERNAME)
del expected['published']
self.assert_equals(expected, self.fb.email_to_object(email))

def test_email_to_object_like(self):
facebook.now_fn().AndReturn(datetime(1999, 1, 1))
self.mox.ReplayAll()
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ mf2py>=1.1.2
mf2util>=0.5.0
mox3>=0.24.0
oauth-dropins>=1.14
python-dateutil
requests>=2.10.0
requests-toolbelt>=0.6.2
urllib3>=1.14
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(self, *args, **kwargs):
'mf2py>=1.1.2',
'mf2util>=0.5.0',
'oauth-dropins>=1.14',
'python-dateutil',
'requests-toolbelt>=0.6.2',
'requests>=2.10.0',
'urllib3>=1.14',
Expand Down

0 comments on commit dcb6156

Please sign in to comment.