# SECTION 1: Parse tweets and dump into a JSON file

In [1]:
import requests
from requests_oauthlib import OAuth1 # OAuth using requests module
import json
from beautifultable import BeautifulTable # For printing in tabular form

> The app secrets are in an auth/secrets.json file. It is not committed in this repo.

In [2]:
with open('auth/secrets.json', 'r') as f:
    secrets = json.load(f)

In [3]:
HANDLE= 'midasIIITD'
COUNT = '200'    # Maximum tweets per request
URL = 'https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=' + HANDLE + '&count=' + COUNT + '&tweet_mode=extended'
auth = OAuth1(secrets['API_KEY'], secrets['API_SECRET'], secrets['ACCESS_TOKEN'], secrets['ACCESS_TOKEN_SECRET'])

In [4]:
r = requests.get(URL, auth=auth)
print(r.status_code)
tweet_json = json.loads(r.text)

200


> Twitter API allows 200 tweets per requests. Hence multiple requests are to be sent for getting all tweets. The maximum allowable tweets are 900 in a window of 15 minutes. After each request, the max_id parameter is updated with the oldest tweet ID. In the next request, all tweets with ID less than or equal to max_id will be fetched

In [5]:
max_id = str(tweet_json[-1]['id'] - 1)

In [6]:
for i in range(4):   # Max allowed tweets: 900. Hence iterating 4 times. 4x200 = 800
    r = requests.get(URL + '&max_id=' + max_id, auth=auth)
    tweet_json += json.loads(r.text)
    max_id = str(tweet_json[-1]['id'] - 1)
    print(i, r.status_code)

0 200
1 200
2 200
3 200


In [7]:
len(tweet_json)  # Total number of tweets fetched

296

In [8]:
with open('midas_tweets.json', 'w') as f:
    json.dump(tweet_json, f)

# SECTION 2: Read JSON file and display data in tabular format

In [9]:
with open('midas_tweets.json', 'r') as f:
    midas_tweets = json.load(f)

len(midas_tweets)

296

> This function counts the number of images/media in the tweet. Whenever an image/media is in the tweet, there is a media key available in the entities/extended_entities payload. The script counts the number of elements it contains. If the present tweets is a retweet, then the function recursively counts the number of images in the origial tweet. 

In [10]:
def count_media(tweet):
    count = 0
    if 'extended_entities' in tweet:
        if 'media' in tweet['extended_entities']:
            count += len(tweet['extended_entities']['media'])
    elif 'media' in tweet['entities']:
        count += len(tweet['entities']['media'])
    if 'retweeted_status' in tweet:
        count += count_media(tweet['retweeted_status'])
    return count
    

In [11]:
table = BeautifulTable()
table.set_style(BeautifulTable.STYLE_BOX_DOUBLED)
table.column_headers = ['ID', 'Text', 'Date&Time', 'Favourites', 'Retweets', 'Images']

for tweet in midas_tweets:
    img_count = count_media(tweet)
    table.append_row([tweet['id'], tweet['full_text'], tweet['created_at'], tweet['favorite_count'],
                  tweet['retweet_count'], img_count])
    
print(table)


╔════╦═════════════════════════════════╦══════╦════════════╦══════════╦════════╗
║ ID ║              Text               ║ Date ║ Favourites ║ Retweets ║ Images ║
║    ║                                 ║ &Tim ║            ║          ║        ║
║    ║                                 ║  e   ║            ║          ║        ║
╠════╬═════════════════════════════════╬══════╬════════════╬══════════╬════════╣
║ 11 ║ @IEEEBigMM19 is also available  ║ Wed  ║     1      ║    1     ║   0    ║
║ 08 ║        on Facebook now.         ║ Mar  ║            ║          ║        ║
║ 28 ║                                 ║ 20 0 ║            ║          ║        ║
║ 18 ║                                 ║ 8:19 ║            ║          ║        ║
║ 74 ║                                 ║ :24  ║            ║          ║        ║
║ 16 ║                                 ║ +000 ║            ║          ║        ║
║ 46 ║                                 ║ 0 20 ║            ║          ║        ║
║ 58 ║                      