# Twitter Credentials

In [None]:
#Variables that contain user credentials to access Twitter API
access_token = ""
access_token_secret = ""
consumer_key = ""
consumer_secret = ""

# Importing of Libraries

In [None]:
from tweepy.streaming import StreamListener #allow us to listen firehose of tweets
from tweepy import OAuthHandler #Authentication of credentials
from tweepy import Stream 

from tweepy import API
from tweepy import Cursor

import numpy as np
import pandas as pd

from IPython.display import display


# To extract timeline tweets 

In [None]:
class TwitterClient():
    def __init__(self, twitter_user=None):
        self.auth = TwitterAuthenticator().authenticate_twitter_app()
        self.twitter_client = API(self.auth)

        self.twitter_user = twitter_user
        
    def get_twitter_client_api(self):
        return self.twitter_client


# Authentication by key credentials

In [None]:
class TwitterAuthenticator():
    
    def authenticate_twitter_app(self):
        auth = OAuthHandler(consumer_key, consumer_secret) #auth object
        auth.set_access_token(access_token, access_token_secret)
        return auth
        

# Class for Streaming and processing live Tweets

In [None]:
class TwitterStreamer():
    
    def __init__(self):
        self.twitter_authenticator = TwitterAuthenticator()
    
    def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
        # This handles twitter authentication and the connection to the twitter Streaming API
        
        
    
        listener = TwitterListener(fetched_tweets_filename) #Listener Object
        auth = self.twitter_authenticator.authenticate_twitter_app()
        stream = Stream(auth, listener)
        
        stream.filter(track=hash_tag_list)

# Dealing with data and errors

This is a basic listener class that just prints recieved tweets to stdout.

In [None]:
#to print streamed data and showing error of any.

class TwitterListener(StreamListener):
    
    def __init__(self, fetched_tweets_filename):
        self.fetched_tweets_filename = fetched_tweets_filename
    
    def on_data(self, data): #use the data genetated from StreamListener
        try:
            print(data)
            with open(self.fetched_tweets_filename,'a') as tf:
                tf.write(data)
            return True
        except BaseException as e:
            print("Error_on_data: %s" % str(e))
        return True
    
    def on_error(self, status): #to print status message of error on screen
        if status == 420:
            return False
        print(status)
        

# Functionality for analyzing and categorizing content from tweets

To get the no of photos we use the extended_entites object whose json format is given in https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object.html

In [None]:
class TweetAnalyzer():
    
    def tweets_to_data_frame(self, tweets):
        df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])
        
    
    
        df['date'] = np.array([tweet.created_at for tweet in tweets])               #to get the date and time of tweet
        df['likes']=np.array([tweet.favorite_count for tweet in tweets])            #to get the number of like son tweet
        df['retweet_count'] = np.array([tweet.retweet_count for tweet in tweets])   #to count no. of retweets
                
        count = 0
        i = 1
        photo = []
        for tweet in tweets:
            K=[]
            count = 0
            if 'media' in tweet.entities:
                K = list(k for k,v in tweet.extended_entities.items() if 'media' in k.lower())
                if(K!=[]):
                    for T in tweet.extended_entities['media']:
                        if(T['type']=='photo'):
                            count+=1                          #Counting the no. of photos in a tweet

            i+=1
            if(count!=0):
                photo.append(count)
            else:
                photo.append("None")                        #when there will be no photo in the tweet
        df["photo"]=photo
        return df
                    
                

# Creation of objects, main part of program

In [None]:
if __name__ == "__main__":
    #hash_tag_list = ["MIDAS"]
    
    #fetched_tweets_filename = "tweets.txt"
    
    #twitter_client = TwitterClient('midasIIITD')
    #print(twitter_client.get_user_timeline_tweets(1))
    
    #twitter_streamer = TwitterStreamer()
    #witter_streamer.stream_tweets(fetched_tweets_filename, hash_tag_list)
    
    twitter_client = TwitterClient()
    tweet_analyzer = TweetAnalyzer()
    api = twitter_client.get_twitter_client_api()
    '''The twitter handle from which tweets to be extracted and no. of tweets. In this case the maximum tweets taht can be fetched
    are 200'''
    
    tweets = api.user_timeline(screen_name="", count=200) #Screen name will be username of which want to fetch tweets
    df = tweet_analyzer.tweets_to_data_frame(tweets)
    
    

In [None]:
df.head(10)

In [None]:
print(dir(tweets[0]))

# To get the media urls present in the tweet when there is media present in tweet

In [None]:
for tweet in tweets:
    if 'media' in tweet.entities:
        for media in tweet.extended_entities['media']:
            print (media['media_url'])
            if (media['media_url']) and (media["type"]=="photo"):
                print(media['media_url'])
            
                

# Printing the tweets having media enitity in them

In [None]:
count = 0
i = 1
photo = []
for tweet in tweets:
    K=[]
    K = list(k for k,v in tweet.entities.items() if 'media' in k.lower())
    if(K!=[]):
        print(i,": ",tweet.entities['media'])
        count+=1        
        if 'media' in tweet.entities:
            for media in tweet.extended_entities['media']:
                print (media['media_url'])
        photo.append(1)
    i+=1
print("Media Count: ",count)       

# To print the media url of media as photos type, it won't count the media files of pinned tweets

In [None]:
count = 0
i = 1
photo = []
for tweet in tweets:
    K=[]
    count = 0
    if 'media' in tweet.entities:
        K = list(k for k,v in tweet.extended_entities.items() if 'media' in k.lower())
        if(K!=[]):
            for T in tweet.extended_entities['media']:
                if(T['type']=='photo'):
                    print(i,": ",(T['media_url']))
                    count+=1
        
    i+=1
    if(count!=0):
        photo.append(count)
    else:
        photo.append(None)
photo

# Printing the tweets in dataframe by categorizing the tweet contents in different columns

In [None]:
print(df.head(200))

# exporting of dataframes to json fromat in index format

In [None]:
export = df.to_json(r'output.json',orient='index')