In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.probability import FreqDist
import nltk

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

from datetime import datetime
from wordcloud import WordCloud

In [2]:
cdf = pd.read_csv('cdf.csv')

In [3]:
cdf

Unnamed: 0,MONTH_INT,MONTH_STR,USERNAME,TEXT,TWEET_PROCESSED,TWEET_CLEANED,PM
0,7,Jul,DGAll41,Brexit minister Stephen Barclay tells Michel B...,"['brexit', 'minister', 'stephen', 'barclay', '...",brexit minister stephen barclay tells michel b...,none
1,7,Jul,BioMickWatson,This isn't a superhero movie where the bad guy...,"['superhero', 'movie', 'gets', 'good', 'share'...",superhero movie gets good share common cause b...,none
2,7,Jul,Timlagor,Labour has to promise #2ndRef. No one wants a ...,"['labour', 'promise', '2ndref', 'wants', 'soft...",labour promise 2ndref wants soft brexit many p...,none
3,7,Jul,MySynthDreams,What's this talk about #brexit ? All the bars ...,"['talk', 'brexit', 'bars', 'budapest', 'full',...",talk brexit bars budapest full brit,none
4,7,Jul,JoseSaylor,Ann Widdecombe Says She Has 'No Regrets' Compa...,"['widdecombe', 'says', 'regrets', 'comparing',...",widdecombe says regrets comparing brexit slave...,none
...,...,...,...,...,...,...,...
305299,2,Feb,AgataGostynska,For the European Parliament’s tactics in the t...,"['european', 'parliament', 'tactics', 'trade',...",european parliament tactics trade negotiations...,none
305300,2,Feb,news4321,Man from newspaper that has spent the last thr...,"['newspaper', 'spent', 'last', 'three', 'years...",newspaper spent last three years telling world...,none
305301,2,Feb,Will83064416,"I didn’t vote mate, I support my club no matte...","['vote', 'mate', 'support', 'club', 'matter', ...",vote mate support club matter also statistic v...,none
305302,2,Feb,WilliamHayesWo1,True that Remainers consider ourselves a brigh...,"['true', 'remainers', 'consider', 'bright', 'b...",true remainers consider bright bunch done noth...,none


### Checking top words

In [None]:
words = [w for row in df_mb['TWEET_PROCESSED'] for w in row]
pos_count = FreqDist(words)
freq = pos_count.most_common(50)
df_freq = pd.DataFrame(freq, columns=['WORD', 'FREQ'])
df_freq.head(50)

In [13]:
cdf.dropna(inplace=True)

### Sentiment Analysys with nlkt vader

In [14]:
#with nlkt sentiment analysys
sid = SentimentIntensityAnalyzer()

cdf['SENTIMENT_CP'] = cdf['TWEET_CLEANED'].apply(lambda x: sid.polarity_scores(x)['compound'])
cdf['SENTIMENT_NEUT'] = cdf['TWEET_CLEANED'].apply(lambda x: sid.polarity_scores(x)['neu'])
cdf['SENTIMENT_NEG'] = cdf['TWEET_CLEANED'].apply(lambda x: sid.polarity_scores(x)['neg'])
cdf['SENTIMENT_POS'] = cdf['TWEET_CLEANED'].apply(lambda x:sid.polarity_scores(x)['pos'])

cdf.loc[cdf.SENTIMENT_CP > 0,'SENTIMENT'] = 'positive'
cdf.loc[cdf.SENTIMENT_CP == 0,'SENTIMENT'] = 'neutral'
cdf.loc[cdf.SENTIMENT_CP < 0,'SENTIMENT'] = 'negative'

## Visualization

In [15]:
cdf['SENTIMENT_N'] = cdf['SENTIMENT'].apply(lambda x: 2 if x == 'positive' else (0 if x == 'negative' else 1))

In [None]:
plt.figure(figsize=(20,5))
sns.lineplot(x='MONTH_INT', y='SENTIMENT_N', data=cdf, hue='PM', marker='o')
plt.title('Sentiment Analysis on Tweets 2019')
plt.xticks(df_mb.MONTH_INT.unique(), rotation=90) 
plt.show()

### WordClouds

In [None]:
#vectorizing with TfidfVectorizer for wordclous
tweets = [tweet for tweet in df_mb['TEXT_CLEANED']]
tfidf_vec = TfidfVectorizer(use_idf=True, ngram_range=(1,3))  
tfidf_m = tfidf_vec.fit_transform(tweets)  
feature_names = tfidf_vec.get_feature_names()  

In [None]:
wc = WordCloud(height=500, width=1000, max_words=1000).generate(" ".join(feature_names))
plt.figure(figsize=(10, 10))
plt.imshow(wc)
plt.axis("off")
plt.show()

In [16]:
cdf

Unnamed: 0,MONTH_INT,MONTH_STR,USERNAME,TEXT,TWEET_PROCESSED,TWEET_CLEANED,PM,SENTIMENT_CP,SENTIMENT_NEUT,SENTIMENT_NEG,SENTIMENT_POS,SENTIMENT,SENTIMENT_N
0,7,Jul,DGAll41,Brexit minister Stephen Barclay tells Michel B...,"['brexit', 'minister', 'stephen', 'barclay', '...",brexit minister stephen barclay tells michel b...,none,0.1477,0.714,0.151,0.136,positive,2
1,7,Jul,BioMickWatson,This isn't a superhero movie where the bad guy...,"['superhero', 'movie', 'gets', 'good', 'share'...",superhero movie gets good share common cause b...,none,0.3612,0.588,0.139,0.273,positive,2
2,7,Jul,Timlagor,Labour has to promise #2ndRef. No one wants a ...,"['labour', 'promise', '2ndref', 'wants', 'soft...",labour promise 2ndref wants soft brexit many p...,none,-0.1280,0.779,0.121,0.100,negative,0
3,7,Jul,MySynthDreams,What's this talk about #brexit ? All the bars ...,"['talk', 'brexit', 'bars', 'budapest', 'full',...",talk brexit bars budapest full brit,none,0.0000,1.000,0.000,0.000,neutral,1
4,7,Jul,JoseSaylor,Ann Widdecombe Says She Has 'No Regrets' Compa...,"['widdecombe', 'says', 'regrets', 'comparing',...",widdecombe says regrets comparing brexit slave...,none,-0.8074,0.407,0.593,0.000,negative,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
305299,2,Feb,AgataGostynska,For the European Parliament’s tactics in the t...,"['european', 'parliament', 'tactics', 'trade',...",european parliament tactics trade negotiations...,none,0.0000,1.000,0.000,0.000,neutral,1
305300,2,Feb,news4321,Man from newspaper that has spent the last thr...,"['newspaper', 'spent', 'last', 'three', 'years...",newspaper spent last three years telling world...,none,-0.6908,0.725,0.275,0.000,negative,0
305301,2,Feb,Will83064416,"I didn’t vote mate, I support my club no matte...","['vote', 'mate', 'support', 'club', 'matter', ...",vote mate support club matter also statistic v...,none,0.4215,0.545,0.134,0.322,positive,2
305302,2,Feb,WilliamHayesWo1,True that Remainers consider ourselves a brigh...,"['true', 'remainers', 'consider', 'bright', 'b...",true remainers consider bright bunch done noth...,none,0.8074,0.703,0.000,0.297,positive,2
