In [135]:
from textblob import TextBlob
import pandas as pd
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer
import requests

bearer_token = ""

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\saphy\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


### Task 1 : Derive the sentiment of each tweet using Python module (no need to create your Algorithm)

In [80]:
dataset_name = "Europe"
pd.options.display.max_colwidth = 200 #Increase the size of the output

# Remove unwanted symbols
def clean_tweet(tweet):
    return tweet.replace("#", "").replace("@", "").replace("RT", "").replace("\n", "")

# Import the tweets dataframe
tweets_df = pd.read_csv("data/" + dataset_name + "/tweets.csv")

analyser = SentimentIntensityAnalyzer()

# Assig a sentiment variable to the tweets dataframe
tweets_df['sentiment'] = tweets_df['text'].apply(lambda x: analyser.polarity_scores(clean_tweet(x))['compound'])
tweets_df[['sentiment', 'text']].head(20)


Unnamed: 0,sentiment,text
0,0.7783,"#Europe #jetfuel even Europe bouncing back&gt;\n\nRegional air traffic was back to 85.5% of pre-pandemic levels this week, according to network manager Eurocontrol, with fuel uplift now showing a ..."
1,0.0,Prime Minister of #Estonia. #Sweden #NATO #Europe https://t.co/Ah3euQrPLN
2,0.296,Breaking: Sweden will submit an application to join NATO\n\n#nato #sweden #Finland #europe #UkraineRussianWar #NATO #balticsea https://t.co/Ydj8jU98yg
3,0.8316,Eurovision’s biggest surprise? UK seems to be popular in Europe #bitcoin #Eurovisions #biggest #surprise #popular #Europe https://t.co/uGCJLgkWnu
4,0.0,"Moi je ne suis pas trop sûre que les sanctions n'isolent pas plutôt l'#Europe? Quelques grandes puissances européennes vont peut-être s'en sortir mais d'autres, moins sûr. Par contre la poignée d'..."
5,0.0,All this #fascist of the Nordic Resistance are also just defending their values and countries ... Right?\nNo surprise they attended #Azov Camps over the last yrs and now fight together with the fa...
6,0.0,Outside Roman Colosseum \n\n#colosseum #gladiator #rome #ancientrome #Rome #VaticanCity #travel #europe #vacation #italy #traveller #placestovisit @ Roman Colosseum https://t.co/I13cWhzRwP
7,-0.1027,"A “perfect winter storm” may be forming in #Europe, as the continent seeks to limit Russian gas flows, analysts at #RystadEnergy said in a press release this week. They added there might be not en..."
8,0.3182,"@KM88334862 Top 6 Uganda Day Tours, Trips and Short Excursions\nhttps://t.co/5sZbjxjwCe #daytrip #Exclusive short excursions #UnitedStates #Ukraine #Finland #France #Albania #Denmark #Switzerland ..."
9,0.0,#international #world #global #earth #europe #rwc2022 #RetinaWcongress


### Task 2 : Top 10 hash tags and users based on their number of tweets in your data set.

In [130]:
# Add a key to a dictionary if it doesn't exist yet
def addToDict(hashtags, hash):
    if (hash in hashtags):
        hashtags[hash] += 1
    else:
        hashtags[hash] = 1

# Parse a tweet and add the hashtags to the dictionary
def parse_tweet_for_hashtags(hashtags, tweet):
    
    # List of ending characters
    invalid_chars = [',', '.', '!', '?', ':', ';', '"', '#', '\'', '\\', '/', '|', '@', '$', '%', '^', '&', '*', '(', ')', '_', '+', '=', '{', '}', '[', ']', '<', '>', '~', '`', ' ', '\n', '\t', '\xa0']
    
    parsing_hashtag = False # State value to know if we are parsing a hashtag or not
    hashtag = "" # The hashtag we are parsing
    for letter in tweet:

        # Begin parsing a hashtag
        if letter == "#":
            parsing_hashtag = True
            hashtag += letter

        # End parsing a hashtag
        elif parsing_hashtag and letter in invalid_chars:
            parsing_hashtag = False
            addToDict(hashtags, hashtag)
            hashtag = ""

        # Continue parsing a hashtag
        elif parsing_hashtag:
            hashtag += letter

        # Not parsing a hashtag
        else:
            continue
    # Add the last hashtag, if it exists
    if parsing_hashtag:
        addToDict(hashtags, hashtag)

# Fill a dictionary with the hashtags and their counts
def get_hashtags(datasets):

    hashtags = {} # Create a dictionary to store the hashtags and their counts
    for dataset in datasets:
        tweets_df = pd.read_csv("data/" + dataset + "/tweets.csv")
        for tweet in tweets_df['text']:
            parse_tweet_for_hashtags(hashtags, tweet)
    return hashtags

# Fill a dictionary with the number of tweets per author
def get_author_ids(datasets):
    authors = {} # Create a dictionary to store the authors and their counts
    for dataset in datasets:
        tweets_df = pd.read_csv("data/" + dataset + "/tweets.csv")
        for author in tweets_df['author_id']:
            addToDict(authors, author)
    return authors

def author_by_id(id, datasets):
    for dataset in datasets:
        authors_df = pd.read_csv("data/" + dataset + "/authors.csv")
        author = authors_df[authors_df['id'] == id]
        if len(author) > 0:
            return author
            
    return None

# Sort the hashtags by their counts and return the top 10
def top10(dict):
    sorted_dict = sorted(dict.items(), key=lambda x: x[1], reverse=True)
    return sorted_dict[:10]

def print_top_10_hashtags(datasets):
    hashtags = get_hashtags(datasets)
    top = top10(hashtags)
    print("Top 10 hashtags:")
    for i in range(10):
        print(1 + i, ":", top[i][0], ":", top[i][1])

def print_top_10_authors(datasets):
    authors_id = get_author_ids(datasets)
    top = top10(authors_id)
    print("Top 10 authors:")
    for i in range(10):
        author = author_by_id(top[i][0], datasets)
        print(1 + i, " : ", author['name'].values[0], " (@", author['username'].values[0], ") : ", top[i][1], sep="")
    


# Get the hashtags and their counts
datasets = ["Europe"]

print_top_10_hashtags(datasets)

print("\n")

print_top_10_authors(datasets)






Top 10 hashtags:
1 : #Europe : 40
2 : #europe : 10
3 : #Ukraine : 10
4 : #travel : 8
5 : #UnitedStates : 8
6 : #Germany : 6
7 : #USA : 6
8 : #America : 6
9 : #Sweden : 5
10 : #Finland : 5


Top 10 authors:
1 : Kabira Gorilla Trekking Safaris (@gorillakabira) : 5
2 : Valurank (@valurank) : 3
3 : The Traveling Types (@type_traveling) : 2
4 : AAGGAGAT (@AAGGAGAT1) : 2
5 : عبدالرحمن الطعيس الاسهم والعملات 🇺🇸 (@red6660d) : 2
6 : Mcryptoz (@McryptozCom) : 2
7 : Tracy (𝕮𝖍𝖎) (@chigrl) : 1
8 : Shane Woodford (@WoodfordinDK) : 1
9 : Lars (@Norrtillsoder) : 1
10 : Car (@carwaxo) : 1


### Task 3 : Get the followers of a given twitter user from your acquired data set.

In [148]:
def user_id_by_username(username, datasets):

    # Remove the @ symbol
    username = username.replace("@", "")

    # Look for the user in the datasets
    for dataset in datasets:
        authors_df = pd.read_csv("data/" + dataset + "/authors.csv")
        author = authors_df[authors_df['username'] == username]
        if len(author) > 0:
            return author['id'].values[0]
    return None

def get_followers(user_id, max_results):
    #url = "https://api.twitter.com/2/users/" + str(user_id) + "/followers?max_results=" + str(max_results)
    url = "https://api.twitter.com/2/users/2244994945/followers?user.fields=id,created_at,name,username,protected,verified,profile_image_url,location,url,description,pinned_tweet_id,public_metrics"
    res = requests.get(url, headers={"Authorization": "Bearer " + bearer_token})

    # Check if there is an error
    if (res.json().get("errors")):
        print("Error:", res.json()["errors"])
        return None

    # Convert the response to a dataframe
    followers_df = pd.DataFrame(res.json()["data"])

    # Save the dataframe to a csv file
    followers_df.to_csv("data/followers/" + str(user_id) + ".csv", index=False)

In [150]:
datasets = ["Europe"]
id = user_id_by_username("@gorillakabira", datasets)
get_followers(id, 10)

followers_df = pd.read_csv("data/followers/" + str(id) + ".csv")
followers_df

Unnamed: 0,name,created_at,username,id
0,HighLevelsLeaderships🌍Directeur 🇫🇷,2016-12-25T19:59:51.000Z,industry140,813112037635223554
1,AarojassajoraA,2021-07-29T09:46:25.000Z,ManuelAarnRoja2,1420682034729656327
2,جاسم,2019-04-28T00:38:53.000Z,2W9r9yDMq0epYYt,1122299109854019585
3,Idowu Adesoji,2022-04-28T12:21:02.000Z,SojiIdowu06,1519652428219568134
4,Hut Ho van,2022-05-15T17:22:31.000Z,hutakak,1525889297907593218
...,...,...,...,...
95,सांक?,2022-02-27T15:41:17.000Z,sanka01811857,1497959993772892161
96,thanh dan,2022-02-28T08:46:01.000Z,thanhdandan12,1498217873206288384
97,Tien Dat,2021-12-16T16:34:12.000Z,TienDat0901,1471518978023432199
98,Tiến Nguyễn,2021-12-16T16:21:20.000Z,Tiennguyen2231,1471515743707566093
