## Libraries imported

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

  from .autonotebook import tqdm as notebook_tqdm


## Test tweet

In [107]:
tweet = "@Twitter I hope you have a nice day üôèüèª" 

## Function to preprocess the tweet

In [2]:
# preprocess tweet
def preprocess_tweet(tweet):
    tweet_words = []
    for word in tweet.split(' '):
        if word.startswith('@') and len(word) > 1:
            word = '@user'

        elif word.startswith('http'):
            word = 'http'
    
        tweet_words.append(word)

    tweet_proc = ' '.join(tweet_words)
    return tweet_proc

In [108]:
tweet_proc = preprocess_tweet(tweet)

## Loading the model and tokenizer

In [3]:
# load the model and tokenizer
roberta = 'cardiffnlp/twitter-roberta-base-sentiment'

model = AutoModelForSequenceClassification.from_pretrained(roberta)

tokenizer = AutoTokenizer.from_pretrained(roberta)

labels = ['Negative', 'Neutral', 'Positive']

## Function that passes the process tweet to the model and returns the label

In [4]:
# Sentiment analysis

def sentiment_analysis(tweet_proc):
    sentiments = dict()

    encoded_tweet = tokenizer(tweet_proc, return_tensors='pt')
    output = model(**encoded_tweet)

    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    for i in range(len(scores)):

        l = labels[i]
        s = scores[i]
        sentiments[l] = s
        max_sentiment = max(sentiments, key=sentiments.get)

    return max_sentiment

In [109]:
sentiment = sentiment_analysis(tweet_proc)

In [110]:
sentiment

'Positive'

## Testing the model with a dataframe

In [5]:
import pandas as pd

In [6]:
df = pd.read_csv('tweets_sismos.csv') 

In [12]:
df_proc = pd.DataFrame(df.tweet.apply(preprocess_tweet)) # Creating a new dataframe with the processed tweets

In [13]:
df_proc

Unnamed: 0,tweet
0,#Sismografo de la region del #B√≠ob√≠o registran...
1,Todo M√©xico al enterarse del fallecimiento de ...
2,"#Cultura | üê∂Muri√≥ Frida, famosa perra rescatis..."
3,@user @user Jajajaja\nLa que gener√≥ junto con ...
4,"Muere Frida, perrita rescatista del sismo de 2..."
...,...
495,"#√öLTIMAHORA SEMAR confirma muerte de Frida, la..."
496,Tambi√©n estubo en el terremoto del 2016 en Ecu...
497,#√öltimaHoraüö® Fallece perrita rescatista #Frida...
498,La Secretar√≠a de Marina (@SEMAR_mx) inform√≥ es...


In [16]:
df['sentiment'] = df_proc.tweet.apply(sentiment_analysis) # creating a new field with the sentiment analysis results

In [19]:
df

Unnamed: 0,date,user,tweet,sentiment
0,2022-11-15 23:59:51+00:00,ChileAlertaApp,#Sismografo de la region del #B√≠ob√≠o registran...,Neutral
1,2022-11-15 23:59:44+00:00,Titanio_Records,Todo M√©xico al enterarse del fallecimiento de ...,Negative
2,2022-11-15 23:59:28+00:00,ElNuevoSiglo,"#Cultura | üê∂Muri√≥ Frida, famosa perra rescatis...",Neutral
3,2022-11-15 23:58:55+00:00,Anabarent04,@PonchoGutz @DeniseMaerker Jajajaja\nLa que ge...,Neutral
4,2022-11-15 23:58:43+00:00,gmldigital,"Muere Frida, perrita rescatista del sismo de 2...",Neutral
...,...,...,...,...
495,2022-11-15 22:34:44+00:00,luisamiermx,"#√öLTIMAHORA SEMAR confirma muerte de Frida, la...",Neutral
496,2022-11-15 22:34:44+00:00,Carlosmont88,Tambi√©n estubo en el terremoto del 2016 en Ecu...,Neutral
497,2022-11-15 22:34:12+00:00,ElChisteDelSofa,#√öltimaHoraüö® Fallece perrita rescatista #Frida...,Neutral
498,2022-11-15 22:34:11+00:00,AnitaLomeli,La Secretar√≠a de Marina (@SEMAR_mx) inform√≥ es...,Neutral


In [18]:
df.sentiment.value_counts()

Neutral     456
Positive     24
Negative     20
Name: sentiment, dtype: int64