# Text analysis

Hello everyone! We're learning about text analysis in the section with `TextBlob`, `Scikit-learn`, and `WordCloud`. We'll take a look into what our tweets contain and how we can visualize them to get a better understanding of their makeup.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from textblob import TextBlob as tb
from IPython.display import Image as im

In [None]:
# Read the cleaned tweets
cleaned_tweets = pd.read_csv('cleaned_tweets.csv', index_col=None, header=0)
cleaned_tweets.head()

In [None]:
def analyze_tweets(tweets):
    """Analyzes the sentiment polarity and subjectivity of tweets"""
    sentiment = []
    subjectivity = []
    for text in tweets['text']:
        blob = tb(text)
        sentiment.append(blob.polarity)
        subjectivity.append(blob.subjectivity)
    tweets['sentiment'] = sentiment
    tweets['subjectivity'] = subjectivity
    return tweets

In [None]:
# Analyze the sentiment and subjectivity of tweets
analyzed_tweets = analyze_tweets(cleaned_tweets)
val = analyzed_tweets.iloc[0]
print('Original text:', val['text'])
print('Cleaned text:', val['cleaned_text'])
print('Sentiment:', val['sentiment'])
print('Subjectivity:', val['subjectivity'])

In [None]:
def create_wordcloud(tweets, max_words=500):
    """Create a wordcloud of most common words in a set of tweets"""
    
    # Transform text for WordCloud
    tweets = tweets['cleaned_text']
    tweets = tweets.dropna()
    tweets = ' '.join(tweets)
    tweets = tweets.replace(' ', ',')
    
    # Generate wordcloud image
    wc = WordCloud(background_color="white", max_words=max_words, colormap='plasma')
    wc.generate(tweets)
    plt.imshow(wc, interpolation='bilinear')
    plt.title('Twitter Generated Cloud', size=30)
    plt.axis("off")

In [None]:
# Generate a wordcloud
create_wordcloud(cleaned_tweets)