In [1]:
# Collect relevant tweets through the Twitter API.
import json
import tweepy as tw

In [2]:
# IMPORTANT: enter proper access credential in config_twitter.py file
import config_twitter

In [3]:
# function to establish an initial API connection, respecting the rate limit
def connect_api_client():
    auth = tw.OAuthHandler(config_twitter.consumer_key, config_twitter.consumer_secret)
    auth.set_access_token(config_twitter.access_token, config_twitter.access_token_secret)
    # https://docs.tweepy.org/en/stable/getting_started.html#api
    api = tw.API(auth, wait_on_rate_limit=True)
    try:
        # returns False if credentials could not be verified
        # https://docs.tweepy.org/en/stable/api.html#API.verify_credentials
        api.verify_credentials()
        user = api.verify_credentials()
        if not user:
            raise("Credentials could not be verified: Please check config.py")
        print(f"Connected to Twitter API as {user.name}")
    except Exception as e:
        raise e
    return api

In [4]:
api = connect_api_client()

Connected to Twitter API as Pavel P


In [5]:
# construct a search query
query = 'ebike OR "electric bike" OR "electric bicycle" -filter:retweets'

In [6]:
# decide how many tweets to query
###TODO increase this value later to collect a good dataset (try 2000 for instance)
ntweets = 200

In [7]:
# search and collect relevant tweets
# https://docs.tweepy.org/en/stable/cursor_tutorial.html
# https://docs.tweepy.org/en/stable/code_snippet.html
tweets = [tweet._json for tweet in tw.Cursor(api.search, q=query, lang="en", tweet_mode='extended').items(ntweets)]
len(tweets)

200

In [8]:
# example tweet content (json structure)
tweets[0]

{'created_at': 'Sun Jun 27 22:03:40 +0000 2021',
 'id': 1409271236014882816,
 'id_str': '1409271236014882816',
 'full_text': 'Fiido D11 review: This folding ebike has good looks, low weight, and solid\xa0range https://t.co/Sgd6WxbYW6',
 'truncated': False,
 'display_text_range': [0, 104],
 'entities': {'hashtags': [],
  'symbols': [],
  'user_mentions': [],
  'urls': [{'url': 'https://t.co/Sgd6WxbYW6',
    'expanded_url': 'https://www.blacktechdaily.com/fiido-d11-review-this-folding-ebike-has-good-looks-low-weight-and-solid-range/',
    'display_url': 'blacktechdaily.com/fiido-d11-revi…',
    'indices': [81, 104]}]},
 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},
 'source': '<a href="http://publicize.wp.com/" rel="nofollow">WordPress.com</a>',
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'in_reply_to_screen_name': None,
 'user': {'id': 1218544543294947330,
  'id_str': '1218544543

In [9]:
# save tweets data to json file
file_out = f"raw_tweet_data_{ntweets}.json"
with open(file_out, mode='w') as f:
    f.write(json.dumps(tweets, indent=2))