twitter-corpus-crawler-python

deadly simple python based twitter crawler to gethering corpuses

fetch some number of the result tweets of keword searching in twitter

from tccp import search
for tweet in search({"q":"microsoft"}, 3): 
    print(tweet["contents"])
# => New Deal:Microsoft Office Professional Plus 2016Price:\u20ac9.95 Delivery:24h Just 24h left! https://t.co/UdmXlHWvcQ
# => Microsoft Office 365 \u2013 https://t.co/CJFadmm3yT
# => Visit the Snapzu "tribe" of the hour: /hashtag/Microsoft?src=hash - Feel free to submit related blog posts or media!

fetch some number of the result conversations of keword searching in twitter

from tccp import search_conversation
for conversation in search_conversation({"q":"sexy", "l": "en"}, 1): 
    for tweet in conversation:
        print(tweet["author"] + ": " + tweet["contents"])
# => lyndeyhighan: i hate to brag but this is my boyfriend
# => condorsix: This the sexiest dude I know on god
# => vasquezlaziah21: Umm......con?
# => condorsix: Bro peep that man. You can't tell ur homie he a sexy dude are u really his homie?

continue searching from the last searched tweet(even if terminated by exception when last searching)

from tccp import search
for tweet in search({"q":"microsoft"}, 1, continue_path="last_searching.tmp"): 
    print(tweet["contents"])
# => New Deal:Microsoft Office Professional Plus 2016Price:\u20ac9.95 Delivery:24h Just 24h left! https://t.co/UdmXlHWvcQ

for tweet in search({"q":"microsoft"}, 1, continue_path="last_searching.tmp"): 
    print(tweet["contents"])
# => Microsoft Office 365 \u2013 https://t.co/CJFadmm3yT

fetching tweets without keywords

from tccp import search
for tweet in search({"l":"en"}, 3): 
    print(tweet["contents"])
# it fatches whatever 3 tweets from englisher.

usage

# search
from tccp import search

# fetch 10 recent results of searching
for tweet in search({"q": "trump"}, 10):
    # properties
    print(tweet["author"])
    print(tweet["contents"])
    print(tweet["tweet_id"])
    print(tweet["has_parent_tweet"])
    print(tweet["num_replies"])
    print(tweet["num_retweet"])
    print(tweet["num_like"])
    print(tweet["mentions"])

# fetch tweets infinitly
for tweet in search({"q": "attack"}): 
    print(tweet)
    break

# search only conversations
from tccp import search_conversation

# fetch 10 recent conversation with keyword
for conversations in search_conversation({"q": "North korea"}, 10):
    print("conversation length: " + len(conversation))
    # conversation is list of tweets
    # each tweet has same properties as result of searching
    print("the first tweet of conversations: " + conversation[0])

# fetch conversations infinitly
for conversation in search_conversation({"q": "please"}):
    print(conversation)
    break

# fetch conversations continuesly
for conversation in search_conversation({"q": "please"}, continue_path="path_for_last_searching_file.tmp"):
    # load the last searching state at the path
    # it keeps saving last searching state to the path.
    print(conversation)
    break

for conversation in search_conversation({"q": "please"}, continue_path="path_for_last_searching_file.tmp"):
    print(conversation)

# fetch conversations without keywords
for conversation in search_conversation({"l": "en"}):
    print(conversation)

Name		Name	Last commit message	Last commit date
Latest commit History 19 Commits
.gitignore		.gitignore
LICENSE		LICENSE
README.md		README.md
tccp.py		tccp.py

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

.gitignore

.gitignore

LICENSE

LICENSE

README.md

README.md

tccp.py

tccp.py

Repository files navigation

twitter-corpus-crawler-python

usage

About

Releases

Packages

Languages

License

song9446/twitter-corpus-crawler-python

Folders and files

Latest commit

History

Repository files navigation

twitter-corpus-crawler-python

usage

About

Resources

License

Stars

Watchers

Forks

Languages