### Task 4 : Given a twitter user, obtain the tweets and profiles of all followers of the user and show it.

#### Imports

In [7]:
# Imports
import pandas as pd # Pandas for dataframes
from IPython.display import display # Display dataframes in Jupyter in conditional statements

#### Settings

In [8]:
pd.options.display.max_colwidth = 200 # Increase the size of the output to see the tweet text

#### Utility functions

In [9]:
# Return the user id of a user with a given username
def user_id_by_username(username, datasets):

    # Remove the @ symbol, if it exists
    username = username.replace("@", "")

    # Look for the user in the datasets
    for dataset in datasets: # For each dataset

        # Load the authors
        authors_df = pd.read_csv("data/" + dataset + "/authors.csv")

        # Check if the username exists in the dataset using the username column
        author = authors_df[authors_df['username'] == username]

        # If the username exists, return the user id
        if len(author) > 0:
            return author['id'].values[0]
    return None

#### Main function

In [10]:
def get_followers_and_their_tweets(user_id, datasets):

        # Try to load the dataframe from the csv file
        try:

            # Load the followers dataframe
            followers_df = pd.read_csv("data/followers/" + str(user_id) + ".csv")
        
        # If the file does not exist, return None
        except FileNotFoundError:
            print('The user in not in the dataset')
            return None, None


        # For each follower, get their tweets in the datasets
        tweets_df = pd.DataFrame()
        for dataset in datasets:
                
                # Load the tweets dataframe
                tweets_df_temp = pd.read_csv("data/" + dataset + "/tweets.csv")
    
                # Filter the tweets to only include the user's followers
                tweets_df_temp = tweets_df_temp[tweets_df_temp['author_id'].isin(followers_df['id'])]

                # Add the name of the author to the tweets dataframe
                for index, row in tweets_df_temp.iterrows():
                    tweets_df_temp.at[index, 'author_name'] = followers_df[followers_df['id'] == row['author_id']]['name'].values[0]
                

                # Add the dataset to the tweets dataframe
                tweets_df = pd.concat([tweets_df, tweets_df_temp])

        # Return the followers dataframe and the tweets dataframe
        return followers_df, tweets_df

#### Exemple usage

In [11]:
datasets = ["Paris"]
id = user_id_by_username("@ParisAntan", datasets)


followers_df, tweets_df = get_followers_and_their_tweets(id, datasets)
print("Followers:")
display(followers_df.head())
print("Tweets:")
display(tweets_df[["author_name", "text"]])


Followers:


Unnamed: 0,url,username,created_at,description,profile_image_url,protected,verified,name,public_metrics,id,location,pinned_tweet_id
0,,romamcallister,2022-06-12T22:43:33.000Z,a live moodboard for the ppl who get my vibe,https://pbs.twimg.com/profile_images/1536122850322415617/I3FGPrcg_normal.jpg,False,False,dani,"{'followers_count': 0, 'following_count': 150, 'tweet_count': 29, 'listed_count': 0}",1536116954854920192,,
1,,Sissi785,2020-08-05T11:28:05.000Z,,https://pbs.twimg.com/profile_images/1355627462986788873/_54_P0O5_normal.jpg,True,False,Marie le Doucen Perro,"{'followers_count': 5, 'following_count': 340, 'tweet_count': 781, 'listed_count': 0}",1290972481872855040,"Côtes-d'Armor, Bretagne",
2,,BikeTignolles,2019-03-21T23:18:34.000Z,Les Batignolles -Amboise. Vélociste en herbe,https://pbs.twimg.com/profile_images/1108915535373369346/5xKX0iEn_normal.jpg,False,False,BikeTignolles,"{'followers_count': 53, 'following_count': 441, 'tweet_count': 45, 'listed_count': 0}",1108870543095279616,"Paris, France",
3,https://t.co/3GCUq71RKp,SHAParis8_17,2022-06-11T09:00:41.000Z,"Fondée en 1898 pour faire connaître l'histoire, les sites, les monuments et les personnages célèbres des 2 arrondissements et défendre leur patrimoine culturel.",https://pbs.twimg.com/profile_images/1535549220895264770/5sMGVCgk_normal.jpg,False,False,Société Historique et Archéologique Paris 8e & 17e,"{'followers_count': 6, 'following_count': 46, 'tweet_count': 8, 'listed_count': 0}",1535547331956842499,Paris,
4,,tnnblstn,2013-10-20T14:40:39.000Z,Tacere è la nostra virtù.,https://pbs.twimg.com/profile_images/1395662185859325952/cxOHtPe__normal.jpg,False,False,Étienne™,"{'followers_count': 53, 'following_count': 474, 'tweet_count': 2236, 'listed_count': 0}",1975358845,Paris,1.166284e+18


Tweets:


Unnamed: 0,author_name,text
160,Patrick BERTAUX 🇨🇵😷,📍 Rue Saint-Rustique - #Paris 18éme \n📌 Quartier #Montmartre près de la Place du Tertre \n🚍 #Ligne40_RATP - Place du Tertre-Norvins* \n🚇 Lamarck-Caulaincourt (12) \n* Sous-réserve : il me semble q...
567,NOTRE-DAME de Paris Cathedral DAILY,Painting by Stanislas Lépine (Public Domain)\n\n#NotreDame #NotreDamedeParis #NotreDameCathedral #Paris #France https://t.co/NcYt6PzIhM
668,Patrick BERTAUX 🇨🇵😷,"📍 Boulevard Pasteur - Boulevard de Vaugirard \n📌 À #Paris 15éme \n🚇 Pasteur (6 &amp; 12) \n🚍 Lignes : 39, 88, 91 &amp; 95 Armorique https://t.co/FmGuUtOPPl https://t.co/4f53R67b1S"
1101,ange gabriel 📷,#Notre_Dame de #Paris https://t.co/s8rTcQbVD8
1289,Paris Chicago,#Paris🥰 https://t.co/Vc8gUq2DLK
1292,Paris Chicago,#Paris🥰 https://t.co/kKL49f8kll
1293,Paris Chicago,#Paris🥰 https://t.co/jkdY1tFPFZ
1295,Paris Chicago,#Paris🥰 https://t.co/JUo2by8LRZ
1296,Paris Chicago,#Paris🥰 https://t.co/XSdab9SNxh
1297,Paris Chicago,#Paris🥰 https://t.co/amphxxJkAl
