# Import libraries, we will use transformers pipeline and a BERT model fine tuned for tweets
## Pandas used for df manipulation

In [None]:
from transformers import pipeline
import pandas as pd
# Set up the inference pipeline using a model from the 🤗 Hub
sentiment_analysis = pipeline("sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")

### Let's wrap the sentiment analysis in a function to vectorize the application to a dataframe (parallelize for speed)

In [None]:
# Let's run the sentiment analysis on each tweet
tweets = []
def sentiment_analyzer(element):
   try:
     sentiment = sentiment_analysis(element)

   except: # this will take a while so some rough error handling if a row fails the analysis
     sentiment = 'error'

   return sentiment


## Let's read in our dataframe
### We don't need all of the data from the dataframe, just the tweets and dates for downstream analysis, let's keep the preprocessed content as well in case we need it later.

In [None]:
cols = ['date', 'rawContent', 'renderedContent', 'lang', 'preprocessedContent']

df = pd.read_csv('data/filtered_df.csv',
                 usecols=[i for i in cols],
                 dtype={'rawContent': 'str', 'renderedContent': 'str', 'id':'float', 'lang':'str', 'preprocessedContent':'str'}
                 )
df.head()

## Now let's apply the sentiment analysis to the dataframe, let's use the rendered content as it will include emojis and this sentiment analyzer can use emojis!

In [None]:
df['sentiment_analysis'] = df['renderedContent'].apply(lambda x: sentiment_analyzer(x))
df['sentiment_analysis']

## It looks like we returned a dictionary within an unneeded list, let's get rid of the list

In [None]:
df['sentiment_analysis'] = df['sentiment_analysis'].apply(lambda x: x[0])

## Let's take a peek and save the data for the ITSA analysis

In [None]:
df.head()

In [27]:
len(df)


1648284

In [28]:
df.to_csv('data/filtered_with_sentiment_df.csv')