In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.probability import FreqDist
import nltk

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

from datetime import datetime
from wordcloud import WordCloud

In [None]:
cdf = pd.read_csv('cdf.csv')

### Checking top words

In [None]:
words = [w for row in df_mb['TWEET_PROCESSED'] for w in row]
pos_count = FreqDist(words)
freq = pos_count.most_common(50)
df_freq = pd.DataFrame(freq, columns=['WORD', 'FREQ'])
df_freq.head(50)

### Sentiment Analysys with nlkt vader

In [None]:
#with nlkt sentiment analysys
sid = SentimentIntensityAnalyzer()

df_mb['SENTIMENT_CP'] = df_mb['TWEET_CLEANED'].apply(lambda x: sid.polarity_scores(x)['compound'])
df_mb['SENTIMENT_NEUT'] = df_mb['TWEET_CLEANED'].apply(lambda x: sid.polarity_scores(x)['neu'])
df_mb['SENTIMENT_NEG'] = df_mb['TWEET_CLEANED'].apply(lambda x: sid.polarity_scores(x)['neg'])
df_mb['SENTIMENT_POS'] = df_mb['TWEET_CLEANED'].apply(lambda x:sid.polarity_scores(x)['pos'])

df_mb.loc[df_mb.SENTIMENT_CP > 0,'SENTIMENT'] = 'positive'
df_mb.loc[df_mb.SENTIMENT_CP == 0,'SENTIMENT'] = 'neutral'
df_mb.loc[df_mb.SENTIMENT_CP < 0,'SENTIMENT'] = 'negative'

In [None]:
df_test2.groupby('PM')['SENTIMENT_N'].value_counts()

## Visualization

In [None]:
df_mb['SENTIMENT_N'] = df_mb['SENTIMENT'].apply(lambda x: 2 if x == 'positive' else (0 if x == 'negative' else 1))

In [None]:
plt.figure(figsize=(20,5))
sns.lineplot(x='MONTH_INT', y='SENTIMENT_N', data=df_mb, hue='PM', marker='o')
plt.title('Sentiment Analysis on Tweets 2019')
plt.xticks(df_mb.MONTH_INT.unique(), rotation=90) 
plt.show()

### WordClouds

In [None]:
#vectorizing with TfidfVectorizer for wordclous
tweets = [tweet for tweet in df_mb['TEXT_CLEANED']]
tfidf_vec = TfidfVectorizer(use_idf=True, ngram_range=(1,3))  
tfidf_m = tfidf_vec.fit_transform(tweets)  
feature_names = tfidf_vec.get_feature_names()  

In [None]:
wc = WordCloud(height=500, width=1000, max_words=1000).generate(" ".join(feature_names))
plt.figure(figsize=(10, 10))
plt.imshow(wc)
plt.axis("off")
plt.show()