<a href="https://colab.research.google.com/github/ml-group8/assignment/blob/main/sentiment_analyze_tweets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import requests
import json
import ast
import yaml
import urllib
from datetime import datetime, timedelta, timezone
from google.colab import drive
import re
from textblob import TextBlob

!pip install flair
import flair

!pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

Collecting flair
[?25l  Downloading https://files.pythonhosted.org/packages/f0/3a/1b46a0220d6176b22bcb9336619d1731301bc2c75fa926a9ef953e6e4d58/flair-0.8.0.post1-py3-none-any.whl (284kB)
[K     |█▏                              | 10kB 16.3MB/s eta 0:00:01[K     |██▎                             | 20kB 21.9MB/s eta 0:00:01[K     |███▌                            | 30kB 11.8MB/s eta 0:00:01[K     |████▋                           | 40kB 9.4MB/s eta 0:00:01[K     |█████▊                          | 51kB 7.9MB/s eta 0:00:01[K     |███████                         | 61kB 7.9MB/s eta 0:00:01[K     |████████                        | 71kB 7.9MB/s eta 0:00:01[K     |█████████▏                      | 81kB 8.6MB/s eta 0:00:01[K     |██████████▍                     | 92kB 8.7MB/s eta 0:00:01[K     |███████████▌                    | 102kB 8.9MB/s eta 0:00:01[K     |████████████▋                   | 112kB 8.9MB/s eta 0:00:01[K     |█████████████▉                  | 122kB 8.9MB/s et

In [3]:
# Initialize sentiment analyzers
sentiment_model = flair.models.TextClassifier.load('en-sentiment')
vader_analyzer = SentimentIntensityAnalyzer()

2021-04-05 19:05:53,488 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmprpna0isr


100%|██████████| 265512723/265512723 [00:15<00:00, 16941128.01B/s]

2021-04-05 19:06:09,681 copying /tmp/tmprpna0isr to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt





2021-04-05 19:06:10,139 removing temp file /tmp/tmprpna0isr
2021-04-05 19:06:10,176 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




In [4]:
# Load tweets we extracted from running the fetch_and_save_tweets notebook and that are stored on github

df_tweets = pd.read_csv('https://raw.githubusercontent.com/ml-group8/assignment/main/elec_vehicle_tweets.csv')

# Keep only the timestamp and tweets
df_tweets = df_tweets[['created_at','text']]

df_tweets.head()

Unnamed: 0,created_at,text
0,2021-03-20T15:59:46.000Z,@QuestMalloy I agree. I was looking to buy an ...
1,2021-03-20T15:59:30.000Z,RT @JessiSheron: this line read is so funny!??...
2,2021-03-20T15:59:09.000Z,RT @JessiSheron: this line read is so funny!??...
3,2021-03-20T15:59:03.000Z,@wef Germany electric car and 60% US states ma...
4,2021-03-20T15:58:49.000Z,@Greenpeace Germany electric car and 60% US st...


In [None]:
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
cd /content/gdrive/MyDrive

/content/gdrive/MyDrive


In [None]:
# Force tweets to string
df_tweets['text'] = df_tweets['text'].astype(str)

# Process the tweets 
whitespace = re.compile(r"\s+")
web_address = re.compile(r"(?i)http(s):\/\/[a-z0-9.~_\-\/]+")
tesla = re.compile(r"(?i)@Tesla(?=\b)")
user = re.compile(r"(?i)@[a-z0-9_]+")

def clean_tweet(tweet):
  tweet = whitespace.sub(' ', tweet)
  tweet = web_address.sub('', tweet)
  tweet = tesla.sub('Tesla', tweet)
  tweet = user.sub('', tweet)
  return tweet

def predict_flare_sentiment(tweet):
  sentence = flair.data.Sentence(tweet)
  sentiment_model.predict(sentence)
  label = sentence.labels[0]
  return (label.value, label.score)

def predict_textblob_sentiment(tweet):
  analysis = TextBlob(tweet).sentiment
  #Polarity is float which lies in the range of [-1,1] where 1 means positive statement and -1 means a negative statement
  #For subjectivity, 0.0 is very objective and 1.0 is very subjective

  sentiment = 'POSITIVE' if analysis.polarity > 0.05 else 'NEGATIVE'
  return (sentiment, analysis.subjectivity)

def predict_vader_sentiment(tweet):
  #The analyzer return a dict which contains a compound key whose value is the compounded score
    #positive sentiment: compound score >= 0.05
    #neutral sentiment: (compound score > -0.05) and (compound score < 0.05)
    #negative sentiment: compound score <= -0.05
  # Vader does not appear to provide a confidence value
  compound_score = vader_analyzer.polarity_scores(tweet)['compound']
  return ('POSITIVE' if compound_score > 0.05 else 'NEGATIVE', compound_score)

# Some analyzers will return neutral results and we will consider those as negative assuming no buzz about a stock's domain is bad news for its price
def assign_sentiment_and_score(df):
  for idx, row in df.iterrows():
    tweet = df.at[idx,'tweet']

    flare_analysis = predict_flare_sentiment(tweet)
    df.at[idx,'flare_sentiment'] = flare_analysis[0]
    df.at[idx,'flare_confidence'] = flare_analysis[1]

    txtblob_analysis = predict_textblob_sentiment(tweet)
    df.at[idx,'txtblob_sentiment'] = txtblob_analysis[0]
    df.at[idx,'txtblob_subjectivity'] = txtblob_analysis[1]

    vader_analysis = predict_vader_sentiment(tweet)
    df.at[idx,'vader_sentiment'] = vader_analysis[0]
    df.at[idx,'vader_compound_score'] = vader_analysis[1]


df_tweets['tweet'] = df_tweets['text'].map(lambda text: clean_tweet(text))

#We can drop the original tweet text now that the tweets have been cleaned
del df_tweets['text'] # We have the cleaned tweets

# Assign sentiment and save the dataframe to CSV on the google drive
print('Sentiment analysis starting at : ' + str(datetime.now()))
assign_sentiment_and_score(df_tweets)
df_tweets.to_csv('df_tweets_sentiment_3_analyzers.csv')
print('Sentiment analysis completed at : ' + str(datetime.now()))
df_tweet_copy


Sentiment analysis starting at : 2021-04-03 21:27:10.321174
Sentiment analysis completed at : 2021-04-03 21:49:48.565586


Unnamed: 0,created_at,text,tweet,flare_sentiment,flare_confidence,txtblob_sentiment,txtblob_subjectivity,vader_sentiment,vader_compound_score
0,2021-03-20T15:59:46.000Z,@QuestMalloy I agree. I was looking to buy an ...,I agree. I was looking to buy an electric car...,NEGATIVE,0.999987,NEGATIVE,0.495,NEGATIVE,-0.6478
1,2021-03-20T15:59:30.000Z,RT @JessiSheron: this line read is so funny!??...,"RT : this line read is so funny!??!?! ""WHERESM...",NEGATIVE,0.999818,POSITIVE,1.0,POSITIVE,0.7826
2,2021-03-20T15:59:09.000Z,RT @JessiSheron: this line read is so funny!??...,"RT : this line read is so funny!??!?! ""WHERESM...",NEGATIVE,0.99984,POSITIVE,1.0,POSITIVE,0.7826
3,2021-03-20T15:59:03.000Z,@wef Germany electric car and 60% US states ma...,Germany electric car and 60% US states margin...,NEGATIVE,0.975843,POSITIVE,0.2,POSITIVE,0.4576
4,2021-03-20T15:58:49.000Z,@Greenpeace Germany electric car and 60% US st...,Germany electric car and 60% US states margin...,NEGATIVE,0.975843,POSITIVE,0.2,POSITIVE,0.4576
5,2021-03-20T15:58:47.000Z,RT @jotygill11: Part 4: The ⚡️ Electric 🚗 Cup ...,RT : Part 4: The ⚡️ Electric 🚗 Cup Race winner...,POSITIVE,0.934792,NEGATIVE,0.0,NEGATIVE,0.0
6,2021-03-20T15:58:32.000Z,@Greenpeace Germany electric car and 60% US st...,Germany electric car and 60% US states margin...,NEGATIVE,0.975843,POSITIVE,0.2,POSITIVE,0.4576
7,2021-03-20T15:58:24.000Z,RT @IsSaddleThereIs: @grantshapps The UK in 20...,RT : The UK in 2021: a place where a city has...,POSITIVE,0.739877,POSITIVE,0.4,POSITIVE,0.5859
8,2021-03-20T15:57:27.000Z,RT @pookleblinky: Reminder that the first elec...,RT : Reminder that the first electric cars wer...,POSITIVE,0.930591,POSITIVE,0.111111,POSITIVE,0.1531
9,2021-03-20T15:56:53.000Z,RT @NYDailyNews: Russian national admits in fe...,RT : Russian national admits in federal court ...,NEGATIVE,0.998351,NEGATIVE,0.0,POSITIVE,0.296
