# Sentiment Analysis of Tweets

## import librairies

In [1]:
# pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 4.4 MB/s eta 0:00:01
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

In [66]:
# test the analyser

analyser.polarity_scores("The weather is nice.")

{'neg': 0.0, 'neu': 0.517, 'pos': 0.483, 'compound': 0.4215}

In [21]:
import pandas as pd
import numpy as np
import re

## read tweets into a dataframe

In [29]:
# twitter_users_tweets_2022_july_data

twitter_combined_tweet_data = pd.read_csv (r'/Users/katherina/data_science_tweets_neutrality/twitter_combined_tweet_data.csv')
twitter_combined_tweet_data.head()

Unnamed: 0,tweet_id,tweet_text,tweet_author_id,tweet_context_annotations,tweet_created_at,tweet_entities,tweet_in_reply_to_user_id,tweet_lang,tweet_retweet_count,tweet_reply_count,tweet_like_count,tweet_quote_count,tweet_referenced_tweets
0,1553892491962843136,Smokers and vapers are more likely to have a s...,28785486,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",2022-07-31 23:56:54+00:00,"{'urls': [{'start': 111, 'end': 134, 'url': 'h...",,en,59,43,144,16,
1,1553886764930138122,Gun manufacturers have made more than $1 billi...,28785486,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",2022-07-31 23:34:09+00:00,"{'urls': [{'start': 214, 'end': 237, 'url': 'h...",,en,82,388,187,65,
2,1553880276702560256,Misinformation and stigma may be holding back ...,28785486,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",2022-07-31 23:08:22+00:00,"{'urls': [{'start': 69, 'end': 92, 'url': 'htt...",,en,24,39,46,3,
3,1553872564820361222,Rising concern over the impact of a potential ...,28785486,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",2022-07-31 22:37:43+00:00,"{'annotations': [{'start': 90, 'end': 96, 'pro...",,en,23,28,58,7,
4,1553865859977908225,"At least 28 people have been confirmed dead, b...",28785486,"[{'domain': {'id': '10', 'name': 'Person', 'de...",2022-07-31 22:11:05+00:00,"{'annotations': [{'start': 98, 'end': 109, 'pr...",,en,83,38,204,6,


## test the analyser

In [34]:
tweet_data = twitter_users_tweets_2022_july_data

In [35]:
tweet_text = tweet_data.tweet_text[0]
tweet_text

'Smokers and vapers are more likely to have a severe case of COVID-19 or die of the disease, a new study finds. https://t.co/G9D1rpnGBf'

In [11]:
analyser.polarity_scores(tweet_text)

{'neg': 0.236, 'neu': 0.764, 'pos': 0.0, 'compound': -0.7579}

In [12]:
analyser.polarity_scores(":)")

{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.4588}

In [58]:
analyser.polarity_scores("That is good")

{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}

In [59]:
analyser.polarity_scores("That is GOOD")

{'neg': 0.0, 'neu': 0.355, 'pos': 0.645, 'compound': 0.5622}

In [60]:
analyser.polarity_scores("That is Good.")

{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}

In [61]:
analyser.polarity_scores("That is Good!")

{'neg': 0.0, 'neu': 0.385, 'pos': 0.615, 'compound': 0.4926}

In [62]:
analyser.polarity_scores("That is good...")

{'neg': 0.0, 'neu': 0.408, 'pos': 0.592, 'compound': 0.4404}

In [64]:
analyser.polarity_scores("That is 1 good.")

{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}

## text preprocessing

In [43]:
#cleaning the tweets
def remove_pattern(input_txt, pattern):
    r = re.findall(pattern, input_txt)
    for i in r:
        input_txt = re.sub(i, '', input_txt)        
    return input_txt
def clean_tweets(tweets):
    #remove twitter Return handles (RT @xxx:)
    tweets = np.vectorize(remove_pattern)(tweets, "RT @[\w]*:") 
    
    #remove twitter handles (@xxx)
    tweets = np.vectorize(remove_pattern)(tweets, "@[\w]*")
    
    #remove URL links (httpxxx)
    tweets = np.vectorize(remove_pattern)(tweets, "https?://[A-Za-z0-9./]*")
    
    #remove special characters, numbers, punctuations (except for #)
    tweets = np.core.defchararray.replace(tweets, "[^a-zA-Z]", " ")
    
    return tweets

In [36]:
tweet_data['cleaned_tweet_text'] = clean_tweets(tweet_data['tweet_text'])
tweet_data['cleaned_tweet_text'].head()

0    Smokers and vapers are more likely to have a s...
1    Gun manufacturers have made more than $1 billi...
2    Misinformation and stigma may be holding back ...
3    Rising concern over the impact of a potential ...
4    At least 28 people have been confirmed dead, b...
Name: cleaned_tweet_text, dtype: object

In [53]:
tweet_text = tweet_data.tweet_text[0]
cleaned_tweet_text = tweet_data.cleaned_tweet_text[0]

print("original_tweet_text:",tweet_text, "\nscores",analyser.polarity_scores(tweet_text),
      "\n\ncleaned_tweet_text:",cleaned_tweet_text, "\nscores",analyser.polarity_scores(cleaned_tweet_text))

original_tweet_text: Smokers and vapers are more likely to have a severe case of COVID-19 or die of the disease, a new study finds. https://t.co/G9D1rpnGBf 
scores {'neg': 0.236, 'neu': 0.764, 'pos': 0.0, 'compound': -0.7579} 

cleaned_tweet_text: Smokers and vapers are more likely to have a severe case of COVID-19 or die of the disease, a new study finds.  
scores {'neg': 0.245, 'neu': 0.755, 'pos': 0.0, 'compound': -0.7579}


In [52]:
tweet_text = tweet_data.tweet_text[1]
cleaned_tweet_text = tweet_data.cleaned_tweet_text[1]

print("original_tweet_text:",tweet_text, "\nscores",analyser.polarity_scores(tweet_text),
      "\n\ncleaned_tweet_text:",cleaned_tweet_text, "\nscores",analyser.polarity_scores(cleaned_tweet_text))

original_tweet_text: Gun manufacturers have made more than $1 billion from selling AR-15-style guns over the past decade, and for two companies those revenues have tripled over the past three years, according to a House investigation. https://t.co/bhfdFrxtH3 
scores {'neg': 0.066, 'neu': 0.934, 'pos': 0.0, 'compound': -0.34} 

cleaned_tweet_text: Gun manufacturers have made more than $1 billion from selling AR-15-style guns over the past decade, and for two companies those revenues have tripled over the past three years, according to a House investigation.  
scores {'neg': 0.068, 'neu': 0.932, 'pos': 0.0, 'compound': -0.34}


In [41]:
tweet_polarity_scores = []

for i in range(tweet_data['cleaned_tweet_text'].shape[0]):
    tweet_id = tweet_data['tweet_id'][i]
    compound = analyser.polarity_scores(tweet_data['cleaned_tweet_text'][i])["compound"]
    pos = analyser.polarity_scores(tweet_data['cleaned_tweet_text'][i])["pos"]
    neu = analyser.polarity_scores(tweet_data['cleaned_tweet_text'][i])["neu"]
    neg = analyser.polarity_scores(tweet_data['cleaned_tweet_text'][i])["neg"]
    
    tweet_polarity_scores.append({"tweet_id": tweet_id,
                       "scores_compound": compound,
                       "scores_positive": pos,
                       "scores_negative": neg,
                       "scores_neutral": neu
                  })

tweet_sentiments_score = pd.DataFrame.from_dict(tweet_polarity_scores)
tweet_sentiments_score.head()

Unnamed: 0,tweet_id,scores_compound,scores_positive,scores_negative,scores_neutral
0,1553892491962843136,-0.7579,0.0,0.245,0.755
1,1553886764930138122,-0.34,0.0,0.068,0.932
2,1553880276702560256,-0.3182,0.0,0.204,0.796
3,1553872564820361222,0.0,0.0,0.0,1.0
4,1553865859977908225,-0.6808,0.099,0.18,0.721
