# Twitter functions

This notebook contains the requested twitter functions.


### Libraries to import

In [1]:
import os
import json

### Function definition

In [2]:
DEFAULT_DATA_DIR = os.path.join("data", "farmers-protest-tweets-2021-03-5.json")

def validate_input(input_str):
    if input_str not in ["1", "2", "3", "4"]:
        input_str = input("Please enter a valid option.\n> ")
        return validate_input(input_str)
    return input_str

def top_retweeted(input_path):
    """Reads the dataset at input_path and returns the 10 most retweeted tweets.
    """
    min_val = 0
    top_list = [] 
    with open(input_path) as input_file:
        for line in input_file:
            tweet = json.loads(line.strip())
            if tweet["retweetCount"] >= min_val:
                if len(top_list) == 10:
                    min_tweet = min(top_list, key=lambda x: x["retweetCount"])
                    top_list.remove(min_tweet)
                    top_list.append(tweet)
                    min_val = min(list(map(lambda x: x["retweetCount"], top_list)))
                    
                else:
                    top_list.append(tweet)
                    min_val = min(list(map(lambda x: x["retweetCount"], top_list)))
    return top_list

def top_users(input_path):
    return "top_users"

def top_days(input_path):
    return "top_days"

def top_hashtags(input_path):    
    return "top_hashtags"

def main():
    input_path = input("Enter dataset path (leave empty to use default value): ")
    if not os.path.exists(input_path):
        input_path = DEFAULT_DATA_DIR
        print(f"Default value will be used ({input_path})")

    options_str = """Available functions:
        [1] Top 10 most retweeted tweets
        [2] Top 10 users that tweeted the most
        [3] Top 10 days where people tweeted the most
        [4] Top 10 most used hashtags"""
    
    functions_dict = {
        "1": top_retweeted,
        "2": top_users,
        "3": top_days,
        "4": top_hashtags
    }
    
    selected_function = validate_input(input(f"{options_str}\n> "))
    result = functions_dict[selected_function](input_path)
    print(f"Execution result:\n {result}")

### Main execution

In [3]:
main()

Enter dataset path (leave empty to use default value): 
Default value will be used (data/farmers-protest-tweets-2021-03-5.json)
Available functions:
        [1] Top 10 most retweeted tweets
        [2] Top 10 users that tweeted the most
        [3] Top 10 days where people tweeted the most
        [4] Top 10 most used hashtags
> 1
Execution result:
 [{'url': 'https://twitter.com/AmandaCerny/status/1359013362881994752', 'date': '2021-02-09T05:36:49+00:00', 'content': 'To all of my influencer/celeb friends- read up on this   #FarmersProtest 👀', 'renderedContent': 'To all of my influencer/celeb friends- read up on this   #FarmersProtest 👀', 'id': 1359013362881994752, 'user': {'username': 'AmandaCerny', 'displayname': 'Amanda Cerny', 'id': 104856942, 'description': '🤍🤎🖤❤️🧡💛💚💙💜 Not a financial advisor. Comedy is life!', 'rawDescription': '🤍🤎🖤❤️🧡💛💚💙💜 Not a financial advisor. Comedy is life!', 'descriptionUrls': [], 'verified': True, 'created': '2010-01-14T16:24:48+00:00', 'followersCount': 1