Skip to content

Commit

Permalink
[Bug 990690] Filter EichGate keywords locally.
Browse files Browse the repository at this point in the history
  • Loading branch information
mythmon committed Apr 7, 2014
1 parent ffcc9e3 commit 607601e
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 9 deletions.
34 changes: 25 additions & 9 deletions kitsune/customercare/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@
log = logging.getLogger('k.twitter')


def get_word_blacklist_regex():
"""
Make a regex that looks kind of like r'\b(foo|bar|baz)\b'.
This is a function so that it isn't calculated at import time,
and so can be tested more easily.
This doesn't use raw strings (r'') because the "mismatched" parens
were confusing my syntax highlighter, which was confusing me.
"""
return re.compile(
'\\b(' +
'|'.join(map(re.escape, settings.CC_WORD_BLACKLIST)) +
')\\b')


@cronjobs.register
def collect_tweets():
# Don't (ab)use the twitter API from dev and stage.
Expand All @@ -45,15 +61,8 @@ def collect_tweets():
settings.TWITTER_ACCESS_TOKEN_SECRET)

search_options = {
'q': ('firefox OR #fxinput OR @firefoxbrasil OR #firefoxos'
' OR @firefox_es -brendan -eich -ceo -leadership -protest'
' -marriage -boycott -boycottfirefox -opposition -political'
' -gay -political -tolerance -speech -censor -censorship'
' -resign -resignation -equal -equality -intolerant'
' -intolerance -StandWithMozilla -StandWithFirefox -freedom'
' -bigot -bigots -bigoted -liberal -hypocrite -hypocrites'
' -hypocritical -harassment -beliefs -leftists -bullying'
' -bully -homophobic -homophobia'),
'q': ('firefox OR #fxinput OR @firefoxbrasil OR #firefoxos '
'OR @firefox_es'),
'count': settings.CC_TWEETS_PERPAGE, # Items per page.
'result_type': 'recent', # Retrieve tweets by date.
}
Expand Down Expand Up @@ -172,6 +181,13 @@ def _filter_tweet(item, allow_links=False):
statsd.incr('customercare.tweet.rejected.user')
return None

# Exclude problem words
match = get_word_blacklist_regex().search(text)
if match:
bad_word = match.group(1)
statsd.incr('customercare.tweet.rejected.blacklist_word.' + bad_word)
return None

return item


Expand Down
9 changes: 9 additions & 0 deletions kitsune/customercare/tests/test_cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,15 @@ def test_username_and_tweet_contain_firefox(self):
self.tweet['text'] = 'My Firefox crashes :-( Any advice?'
assert _filter_tweet(self.tweet) is not None

@override_settings(CC_WORD_BLACKLIST=['foo'])
def test_word_blacklist(self):
# Full words are blocked.
self.tweet['text'] = 'the word "foo" should be blocked.'
assert _filter_tweet(self.tweet) is None
# Substrings aren't blocked.
self.tweet['text'] = 'but "food" should not be blocked.'
assert _filter_tweet(self.tweet) is not None


class GetOldestTweetTestCase(TestCase):

Expand Down
24 changes: 24 additions & 0 deletions kitsune/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,30 @@ def read_only_mode(env):
CC_REPLIES_GOAL = 175 # Goal # of replies in 24 hours.
CC_TWEETS_DAYS = 7 # Limit tweets to those from the last 7 days.
CC_BANNED_USERS = ['lucasbytegenius'] # Twitter handles banned from AoA
# If any of these words show up in a tweet, it probably isn't
# actionable, so don't add it to the AoA.
CC_WORD_BLACKLIST = [
'$1000',
'boycott',
'#boycottfirefox',
'brendan',
'ceo',
'civil',
'cupid',
'donation',
'eich',
'leadership',
'lgbt',
'marriage',
'#mozillagate',
'opposition',
'political',
'prop8',
'proposition',
'protest',
'rights',
]


TWITTER_COOKIE_SECURE = True
TWITTER_CONSUMER_KEY = ''
Expand Down

0 comments on commit 607601e

Please sign in to comment.