# Extracting Tweets from @midasIIITD handle on Twitter

### Including libraries

In [1]:
import tweepy
import jsonlines
import pandas as pd
from tweepy import OAuthHandler

### Provide consumer key, consumer secret, access token and access secret provided by Twitter API

In [2]:
consumer_key = ""
consumer_secret = ""
access_token = ""
access_secret = ""

### Authenticating Python application

In [3]:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)

### Create API

In [4]:
api = tweepy.API(auth)

### Fetching all tweets and storing in empty list

In [5]:
result=[]

> Loops till all tweets are not fetched

In [6]:
name = "midasIIITD"
for status in tweepy.Cursor(api.user_timeline, screen_name='midasIIITD', retweet_mode='extended', tweet_mode="extended").items():
    result.append(status)

In [19]:
print("No. of tweets fetched: " + str(len(result)))

No. of tweets fetched: 343


### Write all elements of the list i.e. Tweet details into JSONlines file

In [8]:
with jsonlines.open('output.jsonl', mode='w') as writer:
    for res in result:
        writer.write(res._json)

### Examining format of object

> Object

In [9]:
result[0]._json

{'created_at': 'Wed Apr 10 04:51:26 +0000 2019',
 'id': 1115839682607239173,
 'id_str': '1115839682607239173',
 'full_text': 'RT @IIITDelhi: Applications open for MTech (CB) through JNU CEEB Admission process. Candidates can check the JNU CEEB admission process in…',
 'truncated': False,
 'display_text_range': [0, 139],
 'entities': {'hashtags': [],
  'symbols': [],
  'user_mentions': [{'screen_name': 'IIITDelhi',
    'name': 'IIIT Delhi',
    'id': 2227868629,
    'id_str': '2227868629',
    'indices': [3, 13]}],
  'urls': []},
 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>',
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'in_reply_to_screen_name': None,
 'user': {'id': 1021355762575073281,
  'id_str': '1021355762575073281',
  'name': 'MIDAS IIITD',
  'screen_name': 'midasIIITD',
  'location': 'New Delhi, India',
  'description': 'MIDAS is a group of researchers at III

> Checking whether text is tweet or subtweet and giving output accordingly

In [10]:
full_text_retweeted = result[0]._json.get("retweeted_status")

if None != full_text_retweeted:
    ft = str(full_text_retweeted["full_text"])
else:
    ft = str(result[0]._json["full_text"])
    
print(ft)

Applications open for MTech (CB) through JNU CEEB Admission process. Candidates can check the JNU CEEB admission process in the following links : 
JNU CEEB - https://t.co/ciq1scu9Pm  
IIITD - https://t.co/orPaYEeKBL https://t.co/l816beuIXr


> Seperating string of 'created_at' into different objects in list

In [11]:
dt = result[0]._json['created_at'].split(' ')
print(dt)

['Wed', 'Apr', '10', '04:51:26', '+0000', '2019']


> Extracting favorite count and retweet count

In [12]:
fc = result[0]._json['favorite_count']
rc = result[0]._json['retweet_count']
print("Favorite count: " + str(fc))
print("Retweet count: " + str(rc))

Favorite count: 0
Retweet count: 1


> Extracting media files URLs

In [13]:
try:
    noi = result[5]._json['extended_entities']
except:
    noi=0
print(noi)

{'media': [{'id': 1115149307798224898, 'id_str': '1115149307798224898', 'indices': [280, 303], 'media_url': 'http://pbs.twimg.com/media/D3nOYxrWAAIv5Cx.jpg', 'media_url_https': 'https://pbs.twimg.com/media/D3nOYxrWAAIv5Cx.jpg', 'url': 'https://t.co/snX2GkzvQg', 'display_url': 'pic.twitter.com/snX2GkzvQg', 'expanded_url': 'https://twitter.com/midasIIITD/status/1115149324533542912/photo/1', 'type': 'photo', 'sizes': {'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'large': {'w': 960, 'h': 1123, 'resize': 'fit'}, 'medium': {'w': 960, 'h': 1123, 'resize': 'fit'}, 'small': {'w': 581, 'h': 680, 'resize': 'fit'}}}]}


### Features extracted:
> 'full_text' contains the entire text of the Tweet  
> 'created_at' gives the date and time of the Tweet  
> 'favorite_count' gives no. of likes/favorites of the post  
> 'retweet_count' gives no. of retweets of the post  
>  'extended_entities' gives all the media available with the post

In [14]:
count=0
with jsonlines.open('output.jsonl') as reader:
    pl=[]
    for obj in reader:
        
        tl=[]
        
        full_text_retweeted = obj.get("retweeted_status") #get retweeted status

        if None != full_text_retweeted: #if retweeted
            ft = str(full_text_retweeted["full_text"]) #get text of retweet
            rt="Yes" #set retweeted status as 'yes'
        else:
            ft = str(obj["full_text"]) #get text of tweet
            rt="No" #set retweeted status as 'no'

        dt = obj['created_at'].split(' ') #split string and store in list
        date = ""
        date = str(dt[1]) + "-" + str(dt[2]) + "-" + str(dt[5]) #combine elements from list to create date
        time = str(dt[3]) #combine elements from list to create time
        
        fc = obj['favorite_count']
        rc = obj['retweet_count']
        
        count+=1
        
        try:
            noi = len(obj['extended_entities']) #length of the list of 'extended_entities gives no. of media files
        except:
            noi=0
        
        if(noi==0):
            noi=str('None')
        
        tl=[ft, date, time, fc, rc, noi, rt] #store all values in temporary list
        pl.append(tl) #append values of temporary list to permanent list

### Convert to DataFrame

In [15]:
pl = pd.DataFrame(pl)

### Assign column names

In [16]:
pl.columns = ["Text", "Date", "Time", "Favorite_count", "Retweet_count", "count_media_files", "Retweet"]

In [17]:
pl.head(10)

Unnamed: 0,Text,Date,Time,Favorite_count,Retweet_count,count_media_files,Retweet
0,Applications open for MTech (CB) through JNU C...,Apr-10-2019,04:51:26,0,1,,Yes
1,We are delighted to share that IIIT-Delhi is r...,Apr-09-2019,16:45:07,0,13,,Yes
2,"Professor Jelani Nelson founded AddisCoder, a ...",Apr-09-2019,05:04:27,0,35,,Yes
3,For anyone interested in submitting to EMNLP 2...,Apr-09-2019,05:04:11,0,16,,Yes
4,Announcing the 2019 MediaEval multimedia tasks...,Apr-08-2019,19:38:09,0,15,,Yes
5,"Many Congratulations to @midasIIITD student, S...",Apr-08-2019,07:08:12,18,2,1.0,No
6,@midasIIITD thanks all students who have appea...,Apr-08-2019,03:27:42,5,0,1.0,No
7,"@himanchalchandr Meanwhile, complete CV/NLP ta...",Apr-07-2019,14:17:29,0,0,,No
8,@sayangdipto123 Submit as per the guideline ag...,Apr-07-2019,14:17:09,0,0,,No
9,We request all students whose interview are sc...,Apr-07-2019,11:43:24,1,1,,No


### Save DataFrame to .csv file

In [18]:
pl.to_csv("tweets_midasIIITD.csv")