## Import libraries

In [74]:
from pytwitter import Api
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob
import math
from pytwitter.models import User;
from pytwitter.models import Tweet;


In [263]:
api = Api(bearer_token="AAAAAAAAAAAAAAAAAAAAAMhqlAEAAAAA4Pqzn354Z5nlkP5lKaW98vzlVlA%3D7GIA03xacVKdFYTFg7qmgvWTZThpa2FFd4SNPUqP7uPK7Xjue5")

public_tweets = api.search_tweets(query="bolsonaro lang:pt has:hashtags -is:retweet", expansions=["referenced_tweets.id.author_id","in_reply_to_user_id","attachments.media_keys","author_id","entities.mentions.username"], 
                                  user_fields=["created_at","entities","id","location","name","pinned_tweet_id","profile_image_url","protected","public_metrics","url","username","verified"],
                                  tweet_fields=["attachments","author_id","context_annotations","created_at","entities","geo","in_reply_to_user_id","lang","public_metrics","reply_settings","source"], max_results=20)

# Analysis

## User's influence and reputation Analysis

In [68]:
def get_sentiment_score(text):
    "float: The sentiment score between -1.0 (negative) and 1.0 (positive)"
    blob = TextBlob(text)
    return blob.sentiment.polarity

### Influence Calculus

In [84]:
def calculate_influence(user: User):
    follower_count = user.public_metrics.followers_count

    # Get user's tweet count and average engagement rate
    tweets = api.get_timelines(user.id, max_results=100, tweet_fields=["attachments","author_id","context_annotations","created_at","entities","geo","in_reply_to_user_id","lang","public_metrics","reply_settings","source"])
    tweet_count = len(tweets.data)
    total_engagement = 0
    
    for tweet in tweets.data:
        total_engagement += tweet.public_metrics.like_count + tweet.public_metrics.retweet_count + tweet.public_metrics.quote_count + tweet.public_metrics.reply_count
        
    if tweet_count > 0:
        avg_engagement_rate = total_engagement / (tweet_count * follower_count) if total_engagement > 0 and tweet_count > 0 and follower_count > 0 else 0
    else:
        avg_engagement_rate = 0

    # Calculate influence score
    influence_score = math.log(follower_count + 1, 10) * (avg_engagement_rate + 1)
    
    return influence_score

### Reputation Calculus

In [80]:
def calculate_reputation(user: User):
    # Get user's recent mentions and replies
    mentions = api.search_tweets(query=f"@{user.username}", max_results=100)
    replies = api.search_tweets(query=f"to:{user.username}", max_results=100)

    # Calculate reputation score based on sentiment analysis of mentions and replies
    positive_sentiments = 0
    negative_sentiments = 0
    
    for mention in mentions.data:
        if mention.author_id != user.id:
            sentiment = get_sentiment_score(mention.text)
            if sentiment > 0:
                positive_sentiments += 1
            elif sentiment < 0:
                negative_sentiments += 1
                
    for reply in replies.data:
        if reply.author_id != user.id:
            sentiment = get_sentiment_score(reply.text)
            if sentiment > 0:
                positive_sentiments += 1
            elif sentiment < 0:
                negative_sentiments += 1
                
    if (positive_sentiments + negative_sentiments) > 0:
        reputation_score = positive_sentiments / (positive_sentiments + negative_sentiments)
    else:
        reputation_score = 0

    # Return influence and reputation scores
    return reputation_score

## Network Analysis

## Text Analysis

### Text Cleaning

In [None]:
import re
import string
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

def remove_urls(text):
    """Remove URLs from a text"""
    return re.sub(r"http\S+", "", text)

def remove_mentions(text):
    """Remove mentions from a text"""
    return re.sub(r"@\S+", "", text)

def remove_hashtags(text):
    """Remove hashtags from a text"""
    return re.sub(r"#\S+", "", text)

def remove_punctuation(text):
    """Remove punctuation from a text"""
    translator = str.maketrans('', '', string.punctuation)
    return text.translate(translator)

def tokenize(text):
    """Tokenize a text"""
    return word_tokenize(text, language='portuguese')

def remove_stopwords(tokens):
    """Remove stopwords from a list of tokens"""
    stop_words = set(stopwords.words('portuguese'))
    return [token for token in tokens if not token in stop_words]

def lemmatize(tokens):
    """Lemmatize a list of tokens"""
    lemmatizer = WordNetLemmatizer()
    lemmas = []
    for token in tokens:
        lemma = lemmatizer.lemmatize(token, wordnet.VERB)
        if lemma == token:
            lemma = lemmatizer.lemmatize(token, wordnet.NOUN)
        if lemma == token:
            lemma = lemmatizer.lemmatize(token, wordnet.ADJ)
        if lemma == token:
            lemma = lemmatizer.lemmatize(token, wordnet.ADV)
        lemmas.append(lemma)
    return lemmas

def synonymize(tokens):
    """Synonymize a list of tokens"""
    synonyms = []
    for token in tokens:
        synsets = wordnet.synsets(token, lang='por')
        if synsets:
            synset = synsets[0]
            for lemma in synset.lemmas(lang='por'):
                synonym = lemma.name()
                if synonym not in synonyms and synonym != token:
                    synonyms.append(synonym)
    return synonyms

def preprocess_tweet(text):
    """Preprocess a Brazilian Portuguese tweet"""
    text = text.lower()
    text = remove_urls(text)
    text = remove_mentions(text)
    text = remove_hashtags(text)
    text = remove_punctuation(text)
    tokens = tokenize(text)
    tokens = remove_stopwords(tokens)
    tokens = lemmatize(tokens)
    tokens.extend(synonymize(tokens))
    return tokens

### Tweet Social Capital

In [230]:
import re
import datetime

def get_tweet_social_capital(tweet: Tweet):
    # Get tweet text
    text = tweet.text
    
    # Get number of likes and retweets
    likes = tweet.public_metrics.like_count
    retweets = tweet.public_metrics.retweet_count
    replies = tweet.public_metrics.reply_count
    words = len(text.split())
    hashtags = len(re.findall(r'#(\w+)', text))
    
    # Get tweet creation time
    created_at = tweet.created_at
    created_at = datetime.datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%S.%fZ")
    now = datetime.datetime.utcnow()
    age = (now - created_at).total_seconds() / 3600 # tweet age in hours
    
    # Calculate recency score
    recency_score = math.exp(-0.1 * age)
    
    # Get URLs in tweet
    urls = re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', text)
    num_urls = len(urls)
    
    # Get number of emojis in tweet
    emojis = re.findall(r'[^\w\s,]', text)
    num_emojis = len(emojis)
    
    # Get number of photos and videos in tweet
    num_medias = 0
    
    if tweet.attachments is not None and tweet.attachments.media_keys is not None:
        for attachment in tweet.attachments.media_keys:
            num_medias += 1
    
    # Calculate media score
    media_score = 0.5 * num_medias
    
    # Calculate URL score
    url_score = 0.5 * num_urls
    
    # Calculate emoji score
    emoji_score = 0.5 * num_emojis
    
    # Calculate engagement score
    engagement_score = likes + retweets + replies
    
    # Calculate social capital
    social_capital = recency_score * (media_score + url_score + emoji_score + engagement_score + words + hashtags)
    
    return {'tweet': tweet, 'score': social_capital }

# Social Capital Calculus

In [264]:
teste = {}
for user in public_tweets.includes.users:
    teste[user.id] = [calculate_influence(user), calculate_reputation(user)]
    

In [271]:
ranking = {}
usersTeste = {}
for tweet in public_tweets.data:
    for bla in public_tweets.includes.users:
        if(bla.id == tweet.author_id):
            usersTeste[bla.username] = sum(teste[tweet.author_id])
            
    ranking[tweet.id] = get_tweet_social_capital(tweet)
    ranking[tweet.id]['score'] += sum(teste[tweet.author_id]) 
    
    
rankedUsers = dict(sorted(usersTeste.items(), key=lambda item: item[1], reverse=True))
print(rankedUsers)
    
    

{'le40747700': 24.753100092618098, 'lucineianiro57': 6.31146906070576, 'maykonwilian2': 6.279055666699698, 'jornalistavitor': 5.278702875974146, 'GuimaraesLiih': 4.811849016425214, 'betobraga14': 4.2705576070370315, 'Civuca5': 4.1008624434477765, 'flordelicadezas': 3.8037166841769916, 'DiCavaletti': 3.4505540974923665, 'pripstuita': 3.3978202526278927, 'guimarques1985': 3.3658975931999717, 'JaceNordestino': 3.23587311613621, 'DesculpasDoPT': 3.122870922864435, 'PoliticaBR5': 3.0935918820472077, 'Xavierhoz07': 2.0614270026106114, 'garcia15_stela': 1.3507329164653912, 'TaniaDur1': 1.0604019886144498, 'JoseAiltonGLim2': 0.7024648543576988, 'Vandersonjrs': 0.0}


In [272]:
ranked = dict(sorted(ranking.items(), key=lambda item: item[1]['score'], reverse=True))
for tweet in ranked: 
    print(ranking[tweet]['score'], ranking[tweet]['tweet'].text)

54.31735854610308 O bandido do Bolsonaro, depois de assassinar 7 pessoas, dentre elas uma criança de 12 anos que corria para salvar sua vida, atirou para matar policiais. Quem já sabia que essa gente armada iria se voltar contra as forças de segurança? Satisfeitos? #policiamilitar #exercito https://t.co/trAW2KD5Ym
53.75598989219125 O Palácio do Planalto busca aliados dentro dos partidos que estiveram ao lado do ex-presidente Jair Bolsonaro (PL) na eleição do ano passado.

O objetivo é ampliar a base no congresso, mesmo sem contar com o apoio formal dessas legendas. 
#JornalDaCultura #JC https://t.co/mz2gHncAFc
52.96137450697559 crime da sinuca em #Sinop: Os bolsominions q assassinaram 7 pessoas após perder na sinuca tinham passagens por roubo, violência doméstica e porte ilegal de armas.
Mesmo assim o governo Bolsonaro deu ARMAS LEGALIZADAS aos 2 criminosos.#BrasilUrgente #bolsonaro #CidadeAlerta #BBB23 https://t.co/bkUeuA0HND
48.2397036236754 Repeteco mas atual:
De 2019 a 2022, sob o 

# Results

## Influence vs Interactions

## Reputation vs Interactions

## Histogram Interactions

## Evaluation Metrics

In [49]:
def analyze_social_capital(tweets):
    public_tweets = api.search_tweets(query="stf lang:pt has:hashtags is:retweet", expansions=["referenced_tweets.id.author_id","in_reply_to_user_id","attachments.media_keys","author_id","entities.mentions.username"],
    user_fields=["created_at","entities","id","location","name","pinned_tweet_id","profile_image_url","protected","public_metrics","url","username","verified"],
    tweet_fields=["attachments","author_id","context_annotations","created_at","entities","geo","in_reply_to_user_id","lang","public_metrics","reply_settings","source"], max_results=100)

    api.get_followers()

    public_tweets.includes.tweets

#analyze_social_capital(public_tweets.data)


In [None]:

def social_capital_impact(tweet, social_connections, interactions, texts, resources, user_popularity, user_influence):
  # extract the user, mentions, and hashtags from the tweet
  user = tweet['user']
  mentions = tweet['mentions']
  hashtags = tweet['hashtags']
  
  # compute the number of connections the user has in the social network
  user_connections = np.sum(social_connections[user])
  
  # compute the number of connections between the user and the mentions
  mention_connections = 0
  for mention in mentions:
    mention_connections += social_connections[user][mention]
  
  # compute the number of connections between the user and the hashtags
  hashtag_connections = 0
  for hashtag in hashtags:
    hashtag_connections += social_connections[user][hashtag]
  
  # compute the sum of the resources associated with the tweet
  resource_sum = 0
  for resource in resources:
    if resource in tweet['resources']:
      resource_sum += resources[resource]
  
  # compute the number of interactions for the tweet
  interaction_sum = interactions[tweet['tweet_id']]
  
  # compute the text similarity between the tweet and the other texts
  vectorizer = TfidfVectorizer()
  tweet_text = [tweet['text']]
  tweet_vector = vectorizer.fit_transform(tweet_text)
  other_vectors = vectorizer.transform(texts)
  similarity = cosine_similarity(tweet_vector, other_vectors)
  
  # compute the user popularity and influence factors
  popularity_factor = user_popularity[user]
  influence_factor = user_influence[user]
  
  # compute the social capital impact as the sum of mention connections, hashtag connections, resource sum, interaction sum,
  # text similarity, popularity factor, and influence factor divided by the number of user connections
  impact = (mention_connections + hashtag_connections + resource_sum + interaction_sum + similarity + popularity_factor + influence_factor) / user_connections
  
  return impact
