# Analyze sentiment in tweets mentioning "Elon Musk"

#### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import json

In [3]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()
import re
import nltk

words = set(nltk.corpus.words.words())

In [4]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [5]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [6]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

In [7]:
src = pd.read_json(
    "/Users/stiles/twarc2/elon_musk_search_announcement_hours_processed.json",
    dtype={"conversation_id": str, "id": str},
)

In [8]:
df = src.copy()

---

## Last five tweets

In [9]:
df.head()

Unnamed: 0,conversation_id,id,text,entities,retweet_count,reply_count,like_count,quote_count,pst_datetime,pst_time,pst_date,retweet
0,1518711447311724545,1518711447311724545,RT @TitaniaMcGrath: Twitter is a private compa...,"{'mentions': [{'start': 3, 'end': 18, 'usernam...",3302,0,0,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,True
1,1518711447114510336,1518711447114510336,RT @DoliaEstevez: 👉🏽@Twitter confirma su venta...,"{'mentions': [{'start': 3, 'end': 16, 'usernam...",100,0,0,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,True
2,1518677066325053441,1518711446988763136,@EonAnglin @CrypticNoHoes @elonmusk And that's...,"{'mentions': [{'start': 0, 'end': 10, 'usernam...",0,1,8,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,False
3,1518711446942724097,1518711446942724097,RT @CaptAmazo: Friendly reminder that Elon Mus...,"{'mentions': [{'start': 3, 'end': 13, 'usernam...",49499,0,0,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,True
4,1518711446905016320,1518711446905016320,RT @SenWarren: This deal is dangerous for our ...,"{'mentions': [{'start': 3, 'end': 13, 'usernam...",20989,0,0,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,True


---

#### Most liked tweet? 

In [10]:
df[df["like_count"] == df["like_count"].max()]

Unnamed: 0,conversation_id,id,text,entities,retweet_count,reply_count,like_count,quote_count,pst_datetime,pst_time,pst_date,retweet
997537,1518658761979842560,1518658761979842560,Elon Musk told the United Nations he would giv...,,116099,10451,518623,7987,2022-04-25 11:30:38,2022-05-01 11:30:38,2022-04-25,False


#### Most replies?

In [11]:
df[df["reply_count"] == df["reply_count"].max()]

Unnamed: 0,conversation_id,id,text,entities,retweet_count,reply_count,like_count,quote_count,pst_datetime,pst_time,pst_date,retweet
139861,1518702084048179200,1518702084048179200,This deal is dangerous for our democracy. Bill...,,20991,59318,141340,9599,2022-04-25 14:22:47,2022-05-01 14:22:47,2022-04-25,False


#### Most quoted?

In [12]:
df[df["quote_count"] == df["quote_count"].max()]

Unnamed: 0,conversation_id,id,text,entities,retweet_count,reply_count,like_count,quote_count,pst_datetime,pst_time,pst_date,retweet
839094,1518668560675098633,1518668560675098633,🚨 Entre os planos de Elon Musk para o 'novo' T...,,1811,2343,51785,30309,2022-04-25 12:09:35,2022-05-01 12:09:35,2022-04-25,False


----

## Sentiment

#### First, test on the "The coronavirus panic is dumb" tweet

In [13]:
sentence = df["text"][2]
sid.polarity_scores(sentence)["compound"]

0.7845

In [14]:
def cleaner(tweet):

    # Remove @ sign
    tweet = re.sub("@[A-Za-z0-9]+", "", tweet)

    # Remove http links
    tweet = re.sub(r"(?:\@|http?\://|https?\://|www)\S+", "", tweet)
    tweet = " ".join(tweet.split())
    tweet = tweet.replace("#", "").replace("_", " ")

    # Remove hashtag sign but keep the text
    tweet = " ".join(
        w
        for w in nltk.wordpunct_tokenize(tweet)
        if w.lower() in words or not w.isalpha()
    )
    return tweet


df["text_clean"] = df["text"].astype(str).apply(cleaner)

In [15]:
score_list = []
for i in df["text_clean"]:
    score_list.append((sid.polarity_scores(str(i)))["compound"])

In [16]:
df["sentiment"] = pd.Series(score_list)


def sentiment_category(sentiment):
    label = ""
    if sentiment > 0:
        label = "positive"
    elif sentiment == 0:
        label = "neutral"
    else:
        label = "negative"
    return label


df["sentiment_category"] = df["sentiment"].apply(sentiment_category)

In [17]:
df.head()

Unnamed: 0,conversation_id,id,text,entities,retweet_count,reply_count,like_count,quote_count,pst_datetime,pst_time,pst_date,retweet,text_clean,sentiment,sentiment_category
0,1518711447311724545,1518711447311724545,RT @TitaniaMcGrath: Twitter is a private compa...,"{'mentions': [{'start': 3, 'end': 18, 'usernam...",3302,0,0,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,True,: Twitter is a private company and can do what...,0.0,neutral
1,1518711447114510336,1518711447114510336,RT @DoliaEstevez: 👉🏽@Twitter confirma su venta...,"{'mentions': [{'start': 3, 'end': 16, 'usernam...",100,0,0,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,True,: 👉🏽 a Musk 44 mil de . las tome …,0.0,neutral
2,1518677066325053441,1518711446988763136,@EonAnglin @CrypticNoHoes @elonmusk And that's...,"{'mentions': [{'start': 0, 'end': 10, 'usernam...",0,1,8,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,False,And that ' s your opinion that you ' re to on ...,0.7003,positive
3,1518711446942724097,1518711446942724097,RT @CaptAmazo: Friendly reminder that Elon Mus...,"{'mentions': [{'start': 3, 'end': 13, 'usernam...",49499,0,0,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,True,: Friendly reminder that Musk told the UN that...,0.296,positive
4,1518711446905016320,1518711446905016320,RT @SenWarren: This deal is dangerous for our ...,"{'mentions': [{'start': 3, 'end': 13, 'usernam...",20989,0,0,0,2022-04-25 14:59:59,2022-05-01 14:59:59,2022-04-25,True,: This deal is dangerous for our democracy . l...,0.2023,positive


In [22]:
(
    df[df["retweet"] == False].sentiment_category.value_counts(normalize=True) * 100
).round()

neutral     57.0
positive    27.0
negative    16.0
Name: sentiment_category, dtype: float64

In [19]:
df = df.sort_values("sentiment", ascending=True)