## Part A extracting all tweets of MIDAS IIITD from Twitter API and dumping into JSON

In [1]:
import tweepy
import csv
import json
import json

In [2]:
# load Twitter API credentials
with open('twitter_credentials.json') as cred_data:
    info = json.load(cred_data)
    consumer_key = info['CONSUMER_KEY']
    consumer_secret = info['CONSUMER_SECRET']
    access_key = info['ACCESS_KEY']
    access_secret = info['ACCESS_SECRET']

In [13]:
def get_all_tweets(screen_name):
    #Twitter only allows access to a users most recent 3240 tweets with this method

    #authorize twitter, initialize tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)

    #initialize a list to hold all the tweepy Tweets
    alltweets = []
    
    new_tweets = api.user_timeline(screen_name = screen_name,tweet_mode="extended")

    #save most recent tweets
    alltweets.extend(new_tweets)

    #save the id of the oldest tweet less one
    oldest = alltweets[-1].id - 1

    # keep grabbing tweets until there are no tweets left to grab
    while len(new_tweets) > 0:  
        # all subsiquent requests use the max_id param to prevent duplicates
        new_tweets = api.user_timeline(screen_name = screen_name,max_id=oldest,tweet_mode="extended")

        # save most recent tweets
        alltweets.extend(new_tweets)

        # update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1

        print("...%s tweets downloaded so far" % (len(alltweets)))

    print("Total tweets downloaded %s" % (len(alltweets)))
    print("Writing tweet objects to JSON please wait...")
    filewrite=[]
    for status in alltweets:
        filewrite.append(status._json)
    
    with open('tweet.json', 'w', encoding='utf8') as file:        
        json.dump(filewrite,file,sort_keys = True,indent = 4)

In [14]:
get_all_tweets('midasIIITD')

...40 tweets downloaded so far
...60 tweets downloaded so far
...80 tweets downloaded so far
...100 tweets downloaded so far
...120 tweets downloaded so far
...140 tweets downloaded so far
...160 tweets downloaded so far
...180 tweets downloaded so far
...200 tweets downloaded so far
...220 tweets downloaded so far
...240 tweets downloaded so far
...260 tweets downloaded so far
...280 tweets downloaded so far
...300 tweets downloaded so far
...320 tweets downloaded so far
...334 tweets downloaded so far
...334 tweets downloaded so far
Total tweets downloaded 334
Writing tweet objects to JSON please wait...


## Part B (Loading json file and showing relevant contents only)

In [15]:
config = json.loads(open('tweet.json').read())

In [18]:
import pandas as pd

In [39]:
df=pd.DataFrame(columns=["Text","Date/Time","number of favorites/likes"," number of retweets","Number Of images in tweet"])

In [52]:
for i in range(len(config)):
    row=[config[i]['full_text'],config[i]['created_at'],config[i]['favorite_count'],config[i]['retweet_count']]
    try:
        # printing all media urls
        for media in config[i]['extended_entities']['media']:
            print(media['media_url'])
        row.append(len(config[i]['extended_entities']['media']))
    except:
        row.append(None)  # if no media is present simply append None
    
    df.loc[i]=row

http://pbs.twimg.com/media/D3e-35KW0AAzQ3E.jpg
http://pbs.twimg.com/media/D3XGmdMWAAM7bXv.png
http://pbs.twimg.com/media/D2rXC3KU8AAUXrb.jpg
http://pbs.twimg.com/media/D2gZD1IVYAE02dU.jpg
http://pbs.twimg.com/media/D2gZGVwVYAISCDj.jpg
http://pbs.twimg.com/media/D1grsHnU8AAj4IZ.png
http://pbs.twimg.com/media/D1dxxHzXgAIeNSE.jpg
http://pbs.twimg.com/media/D080rrJXgAAWIQC.jpg
http://pbs.twimg.com/media/D0vpRy0VAAAOf5g.jpg
http://pbs.twimg.com/media/D0vpS5nUcAc1eNP.jpg
http://pbs.twimg.com/media/D0vpUHaU4AEoUlc.jpg
http://pbs.twimg.com/media/D0f6jC9X4AQJsgU.jpg
http://pbs.twimg.com/media/Dz1LoMRVsAETaa-.jpg
http://pbs.twimg.com/media/Dz02vCzU8AIGAqp.jpg
http://pbs.twimg.com/media/Dz02vCyU0AAvSSq.jpg
http://pbs.twimg.com/media/DzYK_BAUwAA8sGm.jpg
http://pbs.twimg.com/media/DzToEKZX4AE7rjc.jpg
http://pbs.twimg.com/media/DzQrO_7WsAUu8Na.jpg
http://pbs.twimg.com/media/DyoyOA5VsAApBEK.png
http://pbs.twimg.com/media/DyYCwoTUYAAyze5.jpg
http://pbs.twimg.com/media/DyYCztxUUAAjzT6.jpg
http://pbs.tw

In [49]:
df

Unnamed: 0,Text,Date/Time,number of favorites/likes,number of retweets,Number Of images in tweet
0,We request all students whose interview are sc...,Sun Apr 07 11:43:24 +0000 2019,0,1,
1,"Other queries: ""none of the Tweeter Apis give ...",Sun Apr 07 06:55:19 +0000 2019,3,2,
2,"Other queries: ""do we have to make two differe...",Sun Apr 07 06:53:38 +0000 2019,4,1,
3,"Other queries: ""If using Twitter api, it does ...",Sun Apr 07 05:32:27 +0000 2019,4,1,
4,Response to some queries asked by students on ...,Sun Apr 07 05:29:40 +0000 2019,6,1,
5,RT @kdnuggets: Top 8 #Free Must-Read #Books on...,Sat Apr 06 17:11:29 +0000 2019,0,2,
6,@nupur_baghel @PennDATS Congratulation @nupur_...,Sat Apr 06 16:43:27 +0000 2019,15,3,1
7,We have emailed the task details to all candid...,Fri Apr 05 16:08:37 +0000 2019,10,1,
8,RT @rfpvjr: Our NAACL paper on polarization in...,Fri Apr 05 04:05:11 +0000 2019,0,16,
9,RT @kdnuggets: Effective Transfer Learning For...,Fri Apr 05 04:04:43 +0000 2019,0,11,1
