# Influential Tweets Analysis

Welcome to the influential Tweets analysis notebook! The analysis can be configured using the variables under the Configuration heading. The output for the project will be found under the Example heading at the bottom.

## Imports

In [1]:
import tweepy
from dotenv import load_dotenv
import os
from enum import Enum
import json

def importUsers(users_file):
    users = []
    
    with open(users_file) as json_file:
        data = json.load(json_file)
        for user in data['users']:
            users.append(user['screen_name'])
    
    return users

## Configuration

In [2]:
# JSON file to import users from
users_file = 'data/top-100.json' # other options include 'data/space-100.json' and 'data/finance-100.json'

# List of users to analyze
users = importUsers(users_file)

# Date (inclusive) to start analyzing tweets of users
# TODO: Not implemented
start_date = None

# Date (inclusive) to stop analyzing tweets of users
# TODO: Not implemented
end_date = None

# Number of most recent tweets to look at for each user (Max = 200)
num_tweets = 20

# Threshold at which to print tweet urls to the screen for the example
threshold = 3

## Tweepy Configuration

In [3]:
# Load env variables
load_dotenv()
consumer_key = os.getenv('CONSUMER_KEY')
consumer_secret = os.getenv('CONSUMER_SECRET')

# Set up tweepy with Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth)

## Influential Tweets Class Definition

The code block below contains the InfluentialTweetTracker class that hosts the logic for finding the commonly interacted with tweets between the different users. There are also two helper Enum classes.

In [4]:
class TweetType(Enum):
    TWEET = 1
    RETWEET = 2
    REPLY_TWEET = 3
    QUOTE_TWEET = 4
    
class TweetSelectionType(Enum):
    BY_NUMBER = 1
    BY_DATES = 2

class InfluentialTweetTracker:
    # Dictionary such that key = tweet id and value = # of times tweet has been interacted with by different users
    tweet_counts = {}

    # Constructor for class
    # If start_date and end_date are specified, will use that when determing range
    # of tweets to consider. Otherwise will use the count parameter.
    def __init__(self, users, count = 10, start_date = None, end_date = None):
        self.users = users
        
        if start_date is not None and end_date is not None:
            self.start_date = start_date
            self.end_date = end_date
            self.selection_type = TweetSelectionType.BY_Date
        else:
            self.count = count
            self.selection_type = TweetSelectionType.BY_NUMBER
        
    # Classifies type of tweet (returns TweetType enum)
    def __classifyTweet(self, tweet):
        if hasattr(tweet, 'retweeted_status'):
            return TweetType.RETWEET
        elif hasattr(tweet, 'quoted_status'):
            return TweetType.QUOTE_TWEET
        elif tweet.in_reply_to_status_id is not None:
            return TweetType.REPLY_TWEET
        else:
            return TweetType.TWEET
        
    # If tweet at tweet_id does not exist in dictionary, creates a new entry, otherwise adds 1
    def __addTweetToMap(self, tweet_id):
        if tweet_id in self.tweet_counts:
            self.tweet_counts[tweet_id] += 1
        else: 
            self.tweet_counts[tweet_id] = 1
        
    # Main method that runs the analysis and returns the tweet_counts dictionary
    def findInfluentialTweets(self):
        self.tweet_counts = {}
        
        for user in self.users:
            recent_tweets = None
            
            if self.selection_type == TweetSelectionType.BY_NUMBER:
                recent_tweets = api.user_timeline(screen_name = user, count = self.count, tweet_mode = 'extended')
            elif self.selection_type == TweetSelectionType.BY_DATES:
                recent_tweets = api.user_timeline()
            
            for tweet in recent_tweets:
                tweet_type = self.__classifyTweet(tweet)
                
                if tweet_type == TweetType.RETWEET:
                    tweet_id = tweet.retweeted_status.id
                    self.__addTweetToMap(tweet_id)
                elif tweet_type == TweetType.QUOTE_TWEET:
                    tweet_id = tweet.id
                    tweet_quote_id = tweet.quoted_status.id
                    self.__addTweetToMap(tweet_id)
                    self.__addTweetToMap(tweet_quote_id)
                elif tweet_type == TweetType.REPLY_TWEET:
                    tweet_id = tweet.id
                    tweet_reply_to_id = tweet.in_reply_to_status_id
                    self.__addTweetToMap(tweet_id)
                    self.__addTweetToMap(tweet_reply_to_id)
                else:
                    tweet_id = tweet.id
                    self.__addTweetToMap(tweet_id)
                
        return self.tweet_counts

## Example

Below outputs links to the tweets above the threshold given the configuration specified under the Configuration heading.

In [5]:
tracker = InfluentialTweetTracker(users, count = num_tweets)
influential_tweets = tracker.findInfluentialTweets()

for key in influential_tweets:
    if influential_tweets[key] >= threshold:
        print('https://twitter.com/any/status/%s - %s' % (key, influential_tweets[key]))

https://twitter.com/any/status/1263682103490883588 - 3
https://twitter.com/any/status/1263682100940677121 - 3
https://twitter.com/any/status/1264564734860427265 - 3
https://twitter.com/any/status/1264549210218790912 - 3
https://twitter.com/any/status/1263913627066777601 - 3
https://twitter.com/any/status/1264206707351269376 - 3
https://twitter.com/any/status/1253374355599437830 - 3
https://twitter.com/any/status/1264167544447029248 - 3
