In [None]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import spacy
import re
from pprint import pprint
import nltk
#nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords

from collections import Counter
from wordcloud import WordCloud


import warnings 
warnings.filterwarnings("ignore", category=DeprecationWarning)

%matplotlib inline

In [None]:
df = pd.read_pickle('path/to/topic_modelling_sklearn_results.pkl')
df.head(15)

In [None]:
# Pre-Processing
# Remove links
def remove_links(tweet):
    tweet_no_link = re.sub(r"http\S+", "", tweet)
    return tweet_no_link
df['tweet_text_p'] = np.vectorize(remove_links)(df['Tweet'])
def remove_links(tweet):
    tweet_no_link = re.sub(r"twitter.com\S+", "", tweet)
    return tweet_no_link
df['tweet_text_p'] = np.vectorize(remove_links)(df['tweet_text_p'])

# Remove Twitter Handlers (@Users)
def remove_users(tweet, pattern1, pattern2):
    r = re.findall(pattern1, tweet)
    for i in r:
        tweet = re.sub(i, '', tweet)
    
    r = re.findall(pattern2, tweet)
    for i in r:
        tweet = re.sub(i, '', tweet)
    return tweet
df['tweet_text_p'] = np.vectorize(remove_users)(df['tweet_text_p'], "@ [\w]*", "@[\w]*")

# Remove Hashtag Symbol
# I decided to keep hashtags because they add value to the sentiment.
def remove_hashtags(tweet, pattern1):
    r = re.findall(pattern1, tweet)
    for i in r:
        tweet = re.sub(i, '', tweet)
    return tweet
df['tweet_text_p'] = np.vectorize(remove_hashtags)(df['tweet_text_p'], "#")

# Do not Remove Punctuation

# Removing Punctuation has no significant impact in most cases 
# In some cases reduces the neutrality because of words in parentheses
# df['pre_processed_1'] = df['pre_processed'].str.replace("[^a-zA-Z#]", " ")

# Remove Duplicates
df.drop_duplicates(subset=['tweet_text_p'], keep='first', inplace=True)
df = df[~df.tweet_text_p.str.contains("Retweeted")]
df.shape



In [None]:
# Create an object of Vader Sentiment Analyzer
vader_analyzer = SentimentIntensityAnalyzer()

negative = []
neutral = []
positive = []
compound = []

def sentiment_scores(df, negative, neutral, positive, compound):
    for i in df['tweet_text_p']:
        sentiment_dict = vader_analyzer.polarity_scores(i)
        negative.append(sentiment_dict['neg'])
        neutral.append(sentiment_dict['neu'])
        positive.append(sentiment_dict['pos'])
        compound.append(sentiment_dict['compound'])



In [None]:
# Function calling 
sentiment_scores(df, negative, neutral, positive, compound)

# Prepare columns to add the scores later
df["negative"] = negative
df["neutral"] = neutral
df["positive"] = positive
df["compound"] = compound

# Fill the overall sentiment with encoding:
# (-1)Negative, (0)Neutral, (1)Positive
sentiment = []
for i in df['compound']:
    if i >= 0.05 : 
        sentiment.append(1)
  
    elif i <= - 0.05 : 
        sentiment.append(-1) 
        
    else : 
        sentiment.append(0)
df['sentiment'] = sentiment
neg_tweets = df.sentiment.value_counts()[-1]
neu_tweets = df.sentiment.value_counts()[0]
pos_tweets = df.sentiment.value_counts()[1]

In [None]:
data0 = [df[df.dominant_topic == 0].sentiment.value_counts()[-1], df[df.dominant_topic == 0].sentiment.value_counts()[0], df[df.dominant_topic == 0].sentiment.value_counts()[1]]
data1 = [df[df.dominant_topic == 1].sentiment.value_counts()[-1], df[df.dominant_topic == 1].sentiment.value_counts()[0], df[df.dominant_topic == 1].sentiment.value_counts()[1]]
data2 = [df[df.dominant_topic == 2].sentiment.value_counts()[-1], df[df.dominant_topic == 2].sentiment.value_counts()[0], df[df.dominant_topic == 2].sentiment.value_counts()[1]]
data3 = [df[df.dominant_topic == 3].sentiment.value_counts()[-1], df[df.dominant_topic == 3].sentiment.value_counts()[0], df[df.dominant_topic == 3].sentiment.value_counts()[1]]

In [None]:
# Number of tweets by sentiment per Topic
# Pie Chart

# Draw Plot
fig, ax = plt.subplots(figsize=(15, 6), subplot_kw=dict(aspect="equal"), dpi= 80)


categories = ['Negative', 'Neutral', 'Positive']
explode = [0.05,0.05,0.05]

def func(pct, allvals):
    absolute = int(pct/100.*np.sum(allvals))
    return "{:.1f}% ({:d} )".format(pct, absolute)

wedges, texts, autotexts = ax.pie(data0, 
                                autopct=lambda pct: func(pct, data0),
                                textprops=dict(color="w"), 
                                colors=['#e55039', '#3c6382', '#78e08f'],
                                startangle=140,
                                explode=explode)

# Decoration
ax.legend(wedges, categories, title="Sentiment", loc="center left", bbox_to_anchor=(1, 0.2, 0.5, 1))
plt.setp(autotexts, size=10, weight=700)
ax.set_title("Number of Tweets by Sentiment", fontsize=12, fontweight="bold")
plt.savefig('path/to/SentPlot0.png')


In [None]:
fig, ax = plt.subplots(figsize=(10, 6), subplot_kw=dict(aspect="equal"), dpi= 80)


wedges, texts, autotexts = ax.pie(data1, 
                                autopct=lambda pct: func(pct, data1),
                                textprops=dict(color="w"), 
                                colors=['#e55039', '#3c6382', '#78e08f'],
                                startangle=140,
                                explode=explode)

# Decoration
ax.legend(wedges, categories, title="Sentiment", loc="center left", bbox_to_anchor=(1, 0.2, 0.5, 1))
plt.setp(autotexts, size=10, weight=700)
ax.set_title("Number of Tweets by Sentiment", fontsize=12, fontweight="bold")
plt.savefig('path/to/SentPlot1.png')

In [None]:
fig, ax = plt.subplots(figsize=(10, 6), subplot_kw=dict(aspect="equal"), dpi= 80)


wedges, texts, autotexts = ax.pie(data2, 
                                autopct=lambda pct: func(pct, data2),
                                textprops=dict(color="w"), 
                                colors=['#e55039', '#3c6382', '#78e08f'],
                                startangle=140,
                                explode=explode)

# Decoration
ax.legend(wedges, categories, title="Sentiment", loc="center left", bbox_to_anchor=(1, 0.2, 0.5, 1))
plt.setp(autotexts, size=10, weight=700)
ax.set_title("Number of Tweets by Sentiment", fontsize=12, fontweight="bold")
plt.savefig('path/to/SentPlot2.png')

In [None]:
fig, ax = plt.subplots(figsize=(10, 6), subplot_kw=dict(aspect="equal"), dpi= 80)


wedges, texts, autotexts = ax.pie(data3, 
                                autopct=lambda pct: func(pct, data3),
                                textprops=dict(color="w"), 
                                colors=['#e55039', '#3c6382', '#78e08f'],
                                startangle=140,
                                explode=explode)

# Decoration
ax.legend(wedges, categories, title="Sentiment", loc="center left", bbox_to_anchor=(1, 0.2, 0.5, 1))
plt.setp(autotexts, size=10, weight=700)
ax.set_title("Number of Tweets by Sentiment", fontsize=12, fontweight="bold")
plt.savefig('path/to/SentPlot3.png')