# Twitter Sentiment Analysis


1.Case study : The much anticipated Netflix series, Wednesday was released on November 23 2022 and went on to break numerous                  records. A sentiment analysis was carried out to evaluate the perception among watchers around the globe.

2.Data gathering : 75,000 tweets were scrapped from twitter using the python library snscraper between June 2022 when there was                    much anticipation till January 2023.

3.Data wrangling : This done to fill the missing data and all neccessary adjustment to the raw scraped data

4.Data Preprocessing : This involves this neccessay steps before carrying out the sentiment analysis to remove the stop words,                        tags, url links, Tokenizing the words Lemmitizing words with the use of various libraries in python

5.Sentiment Analysis : A polarity score were first gotten with Textblob library which were then used to set up the conditions                          for the sentiments

6.Data Visualization : Limited visualization can be done on python thus Power BI was opted for in visualizing and bringing                            insights to the scraped data

In [None]:
#the necessary libraries needed

import pandas as pd
import snscrape.modules.twitter as sntwitter
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import string

import nltk
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
from nltk.stem import WordNetLemmatizer

import textblob
from textblob import TextBlob

from wordcloud import WordCloud
from emot.emo_unicode import UNICODE_EMOJI

from wordcloud import ImageColorGenerator
from PIL import Image

import warnings
%matplotlib inline

In [None]:
#creating the query and condition for the scraped data

tweets_list = []

search = ('#WednesdayNetflix OR wednesday netflix OR Wednesday Netflix since:2022-06-01 until:2023-01-10 lang:en')


for i,tweet in enumerate(sntwitter.TwitterSearchScraper(search).get_items()):
    if i>75000:
        break
    tweets_list.append([tweet.date, tweet.user.username, tweet.sourceLabel, tweet.id, tweet.rawContent, tweet.user.location, tweet.likeCount, tweet.retweetCount])


tweets_df = pd.DataFrame(tweets_list, columns=['Datetime','Usernames', 'Source', 'Tweet Id', 'Tweet','Location', 'Number_of_likes', 'Number_of_retweet'])

In [None]:
tweets_df.to_csv('WednesdayAnalysis.csv', index=False)

tweets_df.head()

In [None]:
tweets_df = tweets_df.mask(tweets_df == '')

In [None]:
tweets_df.Location.isna().sum()

In [None]:
tweets_df.Location = tweets_df.Location.fillna('Unknown')
tweets_df.head()

In [None]:
#Extracting the casts from each tweets

wednesday_casts= ['wednesday addams','jenna','morticia','valerie','donovan','hunter','tyler','percy',
        'xavier','bianca','yoko','marilyn','eugene','thing','gomez','larissa']

In [None]:
# Define function to extract casts from each Tweet
def GetCasts(tweet):
    tweet = tweet.lower() 
    tweet_tokens = word_tokenize(tweet)
    Casts = [char for char in tweet_tokens if char in wednesday_casts] 
    tweet = " ".join(Casts)
    return tweet

In [None]:
tweets_df['wednesday_casts'] = tweets_df['Tweet'].apply(GetCasts)
tweets_df.head()

In [None]:
#removing stop words
stop_words = list(stopwords.words('english'))


In [None]:
emoji = list(UNICODE_EMOJI.keys())

In [None]:
# PreProcess the tweets to be ready for sentiment analysis
def ProcessedTweets(tweet):
    tweet = tweet.lower()
    
    # Cleaning and removing URL’s
    tweet = re.sub(r"http\S+|www\S+|https\S+", '', tweet, flags = re.MULTILINE)
    
    # Cleaning and removing repeating characters
    tweet = re.sub(r'\@\w+|\#\w+|\d+', '', tweet)
    
    # Cleaning and removing the above stop words list from the tweet text
    tweet_tokens = word_tokenize(tweet)  
    filtered_words = [w for w in tweet_tokens if w not in stop_words]
    filtered_words = [w for w in filtered_words if w not in emoji]
    
    # Cleaning and removing punctuations
    unpunctuated_words = [w for w in filtered_words if w not in string.punctuation]
    lemmatizer = WordNetLemmatizer() 
    lemma_words = [lemmatizer.lemmatize(w) for w in unpunctuated_words]
    tweet = " ".join(lemma_words)
    return tweet

In [None]:
#Generating a new colum for the processed tweets
tweets_df['Processed Tweets'] = tweets_df['Tweet'].apply(ProcessedTweets)


In [None]:
tweets_df.head()

In [None]:
# convert the tweet text into a string separated with " "
tweets_string = tweets_df['Processed Tweets'].tolist()
tweets_string = " ".join(tweets_string)

Sentiment Analysis


In [None]:
#First define the function to obtain Polarity score
def Polarity(tweet):
    return TextBlob(tweet).sentiment.polarity

#Then set the condition for the polarity
def SentimentTextBlob(polarity):
    if polarity < 0:
        return 'Negative'
    elif polarity == 0:
        return 'Neutral'
    else:
        return 'Positive'


In [None]:
#using the above functions to get polarity and sentiments

tweets_df['Polarity']= tweets_df['Processed Tweets'].apply(Polarity)
tweets_df['Sentiments']= tweets_df['Polarity'].apply(SentimentTextBlob)
tweets_df.Sentiments.value_counts()


In [None]:
tweets_df.head(10)

In [None]:
# Displaying the most talked about word in a word cloud 
# some stop words were still evident but was removed during visualization on Power BI
# Instantiate the Twitter word cloud object

word_cloud = WordCloud(collocations = False,max_words=200, background_color = 'black', width = 7000, height = 5000).generate(tweets_string)

# Display the generated Word Cloud
plt.imshow(word_cloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:


thing_mask = np.array(Image.open('/Users/TEDZ/Downloads/mytopkid.com-wednesday-addams-clipart-7-600x844.png'))

#Grab the mask colors
colors = ImageColorGenerator(thing_mask)

#Instantiate the wordcloud using color_func argument
wordcloud = WordCloud(mask=thing_mask,
                  background_color='white',
                  color_func=colors).generate(tweets_string)

#Plot the wordcloud
plt.figure(figsize=(10,8))
plt.imshow(wordcloud)
plt.axis('off')
plt.title('title')
plt.show()

In [None]:
#saving the data as a csv file 

tweets_df.to_csv('WednesdayNetflix.csv')
word_cloud.to_file("wordcloud.png")