Skip to content


Subversion checkout URL

You can clone with
Download ZIP
Tree: 4655630a65
Fetching contributors…

Cannot retrieve contributors at this time

40 lines (30 sloc) 1.1 KB
Example for using to identify the language of messages
on a twitter livestream. Optionally, it can also filter messages
and display only those in a target language(s).
Expects a Twitterstream on STDIN, such as the one provided by:
# curl -u<username> -s
Outputs lang:message one-per-line to STDOUT
Marco Lui, June 2012
import sys
import langid
import json
import optparse
if __name__ == "__main__":
parser = optparse.OptionParser()
parser.add_option('-l', '--langs', dest='langs', help='comma-separated set of target ISO639 language codes (e.g en,de)')
opts, args = parser.parse_args()
lang_set = set(opts.langs.split(",")) if opts.langs else None
for line in sys.stdin:
j = json.loads(line)
if j.get('retweet_count') == 0:
text = j.get('text')
if text:
lang, conf = langid.classify(text)
if lang_set is None or lang in lang_set:
print "{0}: {1}".format(lang, text.encode('utf8'))
except (IOError, KeyboardInterrupt):
# Terminate on broken pipe or ^C
Jump to Line
Something went wrong with that request. Please try again.