In [12]:
import pandas as pd
from ntscraper import Nitter

scraper = Nitter(log_level= 0, skip_instance_check=False)
language = ""

Testing instances: 100%|██████████| 16/16 [00:23<00:00,  1.45s/it]


In [13]:
def get_tweets(query: str, mode: str = "term", num_tweets: int = 100, lang = 'es'):
    '''
    Main function for web scraping tweets.
    Args:
        query: Words that are going to be searched
        mode: Searching query as a "term", "hashtag" or a "user" tweet
        num_tweets: Number of tweets to be scraped
        lang: Language of the tweets to be scraped
    '''
    global language
    language = lang
    tweets = scraper.get_tweets(terms= query, mode= mode, number= num_tweets, language= lang)
    return tweets

def tweets_to_df(tweets):
    '''
        Process the dictionary data from get_tweets function
        Args:
            tweets: Json type data collected through get_tweets

        Output:
            Pandas DataFrame with profle_id, text columns
    '''
    final_tweets = []
    for tweet in tweets['tweets']:
        data = [tweet['user']['profile_id'], tweet['text']]
        final_tweets.append(data)
    df = pd.DataFrame(final_tweets)
    df.columns = ["profile_id", "text"]
    return df


In [14]:
tweets = get_tweets(query="estoy cansado de", lang="es", num_tweets= 10)
df = tweets_to_df(tweets)

In [19]:
import re
from nltk.corpus import stopwords
import nltk
import emoji

nltk.download('stopwords')
stopword_en = nltk.corpus.stopwords.words('english')
stopword_es = nltk.corpus.stopwords.words('spanish')
stop_words = stopword_en + stopword_es

def clean_tweet(tweet):
    tweet = tweet.lower()
    tweet = re.sub(r'http\S+', '', tweet)  # Deletes URLs
    tweet = re.sub(r'@\w+', '', tweet)  # Deletes mentions
    tweet = re.sub(r'#\w+', '', tweet)  # Deletes hashtags
    tweet = re.sub(r'\d+', '', tweet)  # Deletes numbers
    tweet = ' '.join([word for word in tweet.split() if word not in stop_words])
    tweet = emoji.replace_emoji(tweet, replace= '') # Deletes emojis
    return tweet

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Pablo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [56]:
from pysentimiento import create_analyzer
from langdetect import detect

def get_language(txt: str):
    return detect(txt)
    
def get_sentiment(tweet):
    analyzer = create_analyzer(task="sentiment", lang= language)
    return analyzer.predict(tweet).probas

for tweet in df['text']:
    cleaned_tweet = clean_tweet(tweet)
    sentiment = get_sentiment(tweet)
    df["POS"] = sentiment["POS"]
    df["NEU"] = sentiment["NEU"]
    df["NEG"] = sentiment["NEG"]
    print(f'\nTweet: {cleaned_tweet}\nSentiment: {sentiment}\n --------------------------------')



Tweet: bastante cansado lloros jóvenes creen generación "boomer" consiguió tener arte magia. abuelo, viajaba cada año, salía fiesta, ahorraba, comió fuera, hipotecó trabajaba lunes sábado. ¿tú haces?
Sentiment: {'NEG': 0.9614485502243042, 'NEU': 0.03541366755962372, 'POS': 0.0031378325074911118}
 --------------------------------

Tweet: creemos mejores, cansado fingir no.
Sentiment: {'NEG': 0.805160403251648, 'NEU': 0.17599305510520935, 'POS': 0.01884656772017479}
 --------------------------------

Tweet: encanta, cuanto rápido vamos hacia destrucción terminamos. cansado esperar, ojalá empiecen caer bombas. mundo solución sobra % población.
Sentiment: {'NEG': 0.9797772765159607, 'NEU': 0.01690642721951008, 'POS': 0.0033162208274006844}
 --------------------------------

Tweet: xenófobo, cansado financiar delincuentes blancos, negros, moros cristianos homófobo, cansado financiar gays lesbianas viciosos solo hecho ser gays lesbianas viciosos. ppfobo, cansado partido dice derechas sueñe 

In [57]:
df

Unnamed: 0,profile_id,text,POS,NEU,NEG
0,1611743513976119297,Estoy bastante cansado de los lloros y de los ...,0.003221,0.087439,0.909339
1,1814166107931947008,"No nos creemos mejores, lo somos y ya estoy ca...",0.003221,0.087439,0.909339
2,1703765257825488896,"Me encanta, cuanto más rápido vamos hacia la d...",0.003221,0.087439,0.909339
3,1321828570700939265,"No soy xenófobo, pero estoy cansado de financi...",0.003221,0.087439,0.909339
4,2167792297,"es que es una pasada, yo no lo quiero probar n...",0.003221,0.087439,0.909339
5,1475015802776653824,"no puedo más, mi mente no puede más, estoy can...",0.003221,0.087439,0.909339
6,1598688872954822657,Estoy cansado de ver gente sola sentada en los...,0.003221,0.087439,0.909339
7,1826372026312900608,El diario de todas las aficiones,0.003221,0.087439,0.909339
8,1821555742798733312,"El NIÑO Morgan Freeman, anuncia. Su retirada: ...",0.003221,0.087439,0.909339
9,1631669984869974018,Nacio cansada. Esta igual que aquel que dijo e...,0.003221,0.087439,0.909339
