In [1]:
import pandas as pd
import requests
import json
import ast
import yaml
import urllib
from datetime import datetime, timedelta, timezone
from google.colab import drive
import re
from textblob import TextBlob

!pip install flair
import flair

!pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

Collecting flair
[?25l  Downloading https://files.pythonhosted.org/packages/f0/3a/1b46a0220d6176b22bcb9336619d1731301bc2c75fa926a9ef953e6e4d58/flair-0.8.0.post1-py3-none-any.whl (284kB)
[K     |████████████████████████████████| 286kB 5.7MB/s 
Collecting mpld3==0.3
[?25l  Downloading https://files.pythonhosted.org/packages/91/95/a52d3a83d0a29ba0d6898f6727e9858fe7a43f6c2ce81a5fe7e05f0f4912/mpld3-0.3.tar.gz (788kB)
[K     |████████████████████████████████| 798kB 44.2MB/s 
[?25hCollecting sentencepiece==0.1.95
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 42.6MB/s 
[?25hCollecting ftfy
[?25l  Downloading https://files.pythonhosted.org/packages/04/06/e5c80e2e0f979628d47345efba51f7ba386fe95963b11c594209085f5a9b/ftfy-5.9.tar.gz (66kB)
[K     |████████████████████████████████| 71kB 6.0MB/s 
[?

In [2]:
# Initialize sentiment analyzers
sentiment_model = flair.models.TextClassifier.load('en-sentiment')
vader_analyzer = SentimentIntensityAnalyzer()

2021-04-06 20:51:29,026 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmpig5h5tzm


100%|██████████| 265512723/265512723 [00:11<00:00, 24093414.88B/s]

2021-04-06 20:51:40,433 copying /tmp/tmpig5h5tzm to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt





2021-04-06 20:51:41,443 removing temp file /tmp/tmpig5h5tzm
2021-04-06 20:51:42,341 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




In [3]:
# Load tweets we extracted from running the fetch_and_save_tweets notebook and that are stored on github

df_tweets = pd.read_csv('https://raw.githubusercontent.com/ml-group8/assignment/main/elec_vehicle_tweets.csv')

# Keep only the timestamp and tweets
df_tweets = df_tweets[['created_at','text']]

df_tweets.head()

Unnamed: 0,created_at,text
0,2021-03-20T15:59:46.000Z,@QuestMalloy I agree. I was looking to buy an ...
1,2021-03-20T15:59:30.000Z,RT @JessiSheron: this line read is so funny!??...
2,2021-03-20T15:59:09.000Z,RT @JessiSheron: this line read is so funny!??...
3,2021-03-20T15:59:03.000Z,@wef Germany electric car and 60% US states ma...
4,2021-03-20T15:58:49.000Z,@Greenpeace Germany electric car and 60% US st...


In [None]:
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
cd /content/gdrive/MyDrive

/content/gdrive/MyDrive


In [None]:
# Force tweets to string
df_tweets['text'] = df_tweets['text'].astype(str)

# Process the tweets 
whitespace = re.compile(r"\s+")
web_address = re.compile(r"(?i)http(s):\/\/[a-z0-9.~_\-\/]+")
user = re.compile(r"(?i)@[a-z0-9_]+")

def clean_tweet(tweet):
  tweet = whitespace.sub(' ', tweet)
  tweet = web_address.sub('', tweet)
  tweet = user.sub('', tweet)
  return tweet

def predict_flare_sentiment(tweet):
  sentence = flair.data.Sentence(tweet)
  sentiment_model.predict(sentence)
  label = sentence.labels[0]
  return (label.value, label.score)

def predict_textblob_sentiment(tweet):
  analysis = TextBlob(tweet).sentiment
  #Polarity is float which lies in the range of [-1,1] where 1 means positive statement and -1 means a negative statement
  #For subjectivity, 0.0 is very objective and 1.0 is very subjective

  sentiment = 'POSITIVE' if analysis.polarity > 0.05 else 'NEGATIVE'
  return (sentiment, analysis.subjectivity)

def predict_vader_sentiment(tweet):
  #The analyzer return a dict which contains a compound key whose value is the compounded score
    #positive sentiment: compound score >= 0.05
    #neutral sentiment: (compound score > -0.05) and (compound score < 0.05)
    #negative sentiment: compound score <= -0.05
  # Vader does not appear to provide a confidence value
  compound_score = vader_analyzer.polarity_scores(tweet)['compound']
  return ('POSITIVE' if compound_score > 0.05 else 'NEGATIVE', compound_score)

# Some analyzers will return neutral results and we will consider those as negative assuming no buzz about a stock's domain is bad news for its price
def assign_sentiment_and_score(df):
  for idx, row in df.iterrows():
    tweet = df.at[idx,'tweet']

    flare_analysis = predict_flare_sentiment(tweet)
    df.at[idx,'flare_sentiment'] = flare_analysis[0]
    df.at[idx,'flare_confidence'] = flare_analysis[1]

    txtblob_analysis = predict_textblob_sentiment(tweet)
    df.at[idx,'txtblob_sentiment'] = txtblob_analysis[0]
    df.at[idx,'txtblob_subjectivity'] = txtblob_analysis[1]

    vader_analysis = predict_vader_sentiment(tweet)
    df.at[idx,'vader_sentiment'] = vader_analysis[0]
    df.at[idx,'vader_compound_score'] = vader_analysis[1]


df_tweets['tweet'] = df_tweets['text'].map(lambda text: clean_tweet(text))

#We can drop the original tweet text now that the tweets have been cleaned
del df_tweets['text'] # We have the cleaned tweets

# Assign sentiment and save the dataframe to CSV on the google drive
print('Sentiment analysis starting at : ' + str(datetime.now()))
assign_sentiment_and_score(df_tweets)
df_tweets.to_csv('df_tweets_sentiment_3_analyzers.csv')
print('Sentiment analysis completed at : ' + str(datetime.now()))
df_tweet_copy


Sentiment analysis starting at : 2021-04-06 20:53:12.698285
