### import packages

In [None]:
import tweepy
import webbrowser
import time
import pandas as pd
import numpy as np

### setup Twitter API client

#### get started here: https://developer.twitter.com/en

In [None]:
consumer_key = 'your consumer key here'
consumer_secret = 'your consumer secret here'

access_token = 'your access token here'
access_token_secret = 'your access token secret here'

auth = tweepy.OAuth1UserHandler(
   consumer_key, consumer_secret, access_token, access_token_secret
)

client = tweepy.API(auth, wait_on_rate_limit = True)

In [None]:
# test your connection
public_tweets = client.home_timeline()
for tweet in public_tweets:
    print(tweet.text)

### define functions

In [None]:
# tokenize your string with nltk
def tokenize_text(text: str):
    
    # import needed packages
    import nltk
    import re
    
    # remove unwanted new line and tab characters from the text
    for char in ["\n", "\r", "\d", "\t"]:
        text = text.replace(char, " ")
    
    # lowercase the text
    text = text.lower()
    
    # remove punctuation from text
    text = re.sub(r"[^\w\s]", "", text)
    
    # tokenize the text
    tokens = nltk.word_tokenize(text)
    
    # remove stopwords from txt_tokens and word_tokens
    from nltk.corpus import stopwords
    english_stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in english_stop_words]
    
    # return your tokens
    return tokens

In [None]:
# lemmatize your tokens with nltk
def lemmatize_tokens(tokens):
    
    # import needed packages
    import nltk
    nltk.download('wordnet')
    from nltk.stem import WordNetLemmatizer
    
    # initiate lemmatizer
    lemmatizer = WordNetLemmatizer()
    
    # lemmatize tokens
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    # return your lemmatized tokens
    return lemmatized_tokens

In [None]:
# return the most common tokens
def return_top_tokens(tokens,
                      top_N = 10):
    
    import nltk

    # first, count the frequency of every unique token
    word_token_distribution = nltk.FreqDist(tokens)
    
    # next, filter for only the most common top_N tokens
    # also, put this in a dataframe
    top_tokens = pd.DataFrame(word_token_distribution.most_common(top_N),
                              columns=['Word', 'Frequency'])
    
    # return the top_tokens dataframe
    return top_tokens

In [None]:
# return the most common bi-grams
from nltk.collocations import BigramCollocationFinder

def return_top_bigrams(tokens,
                       top_N = 10):
    
    # collect bigrams
    bcf = BigramCollocationFinder.from_words(tokens)
    
    # put bigrams into a dataframe
    bigram_df = pd.DataFrame(data = bcf.ngram_fd.items(),
                             columns = ['Bigram', 'Frequency'])
    
    # sort the dataframe by frequency
    bigram_df = bigram_df.sort_values(by=['Frequency'],ascending = False).reset_index(drop=True)
    
    # filter for only top bigrams
    bigram_df = bigram_df[0:top_N]
    
    # return the bigram dataframe
    return bigram_df

In [None]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer

def return_sentiment_df(tokens):

    # initialize sentiment analyzer
    sia = SentimentIntensityAnalyzer()
    
    # create some counters for sentiment of each token
    positive_tokens = 0
    negative_tokens = 0
    neutral_tokens = 0
    compound_scores = []
        
    # loop through each token
    for token in tokens:
        
        if sia.polarity_scores(token)["compound"] > 0:
            
            positive_tokens += 1
            compound_scores.append(sia.polarity_scores(token)["compound"])
            
        elif sia.polarity_scores(token)["compound"] < 0:
            
            negative_tokens += 1
            compound_scores.append(sia.polarity_scores(token)["compound"])
              
        elif sia.polarity_scores(token)["compound"] == 0:
            
            neutral_tokens += 1
            compound_scores.append(sia.polarity_scores(token)["compound"])
            
    # put sentiment results into a dataframe
    compound_score_numbers = [num for num in compound_scores if num != 0]
    sentiment_df = pd.DataFrame(data = {"total_tokens" : len(tokens),
                                        "positive_tokens" : positive_tokens,
                                        "negative_tokens" : negative_tokens,
                                        "neutral_tokens" : neutral_tokens,
                                        "compound_sentiment_score" : sum(compound_score_numbers) / len(compound_score_numbers)},
                                index = [0])

    # return sentiment_df
    return sentiment_df

In [None]:
def ultimate_wordcloud_function(text: str,
                                output_filepath: str,
                                mask_path = None,
                                white_mask_background = True,
                                width = 725,
                                height = 300,
                                background_color = "white",
                                colormap = "viridis",
                                contour_color = "steelblue",
                                contour_width = 3,
                                collocations = False,
                                max_words = 2000,
                                max_font_size = 40,
                                min_font_size = 4,
                                prefer_horizontal = 0.9,
                                include_numbers = True):
    
    # start function timer
    import time
    start = time.time()
    
    # tokenize and lemmatize your text
    tokens = tokenize_text(text = text)
    lemmatized_tokens = lemmatize_tokens(tokens = tokens)
    
    # import needed packages
    from wordcloud import WordCloud
    from PIL import Image
    import matplotlib.pyplot as plt
    import numpy as np
    
    # create a wordcloud object without a mask
    if mask_path == None:
    
        # create a WordCloud object
        wordcloud = WordCloud(width = width,
                              height = height,
                              background_color = background_color,
                              colormap = colormap,
                              collocations = collocations,
                              max_words = max_words,
                              max_font_size = max_font_size,
                              min_font_size = min_font_size,
                              prefer_horizontal = prefer_horizontal,
                              include_numbers = include_numbers)
    
    # create a wordcloud object with a mask image
    elif mask_path != None:
        
        # open the mask image as a numpy array
        mask = np.array(Image.open(mask_path))
        
        # if your mask has a black background update to white
        if white_mask_background == False:
            mask[mask[:, :] == 0] = 255
        
        # create a WordCloud object
        wordcloud = WordCloud(mask = mask,
                              width=mask.shape[1],
                              height=mask.shape[0],
                              background_color = background_color,
                              colormap = colormap,
                              contour_color = contour_color,
                              contour_width = contour_width,
                              collocations = collocations,
                              max_words = max_words,
                              max_font_size = max_font_size,
                              min_font_size = min_font_size,
                              prefer_horizontal = prefer_horizontal,
                              include_numbers = include_numbers)

    # generate a word cloud (must join the tokens into a string)
    wordcloud.generate(','.join(lemmatized_tokens))

    # end wordcloud timer
    end = time.time()
    print(f"wordcloud created in {round(end-start, 1)} seconds")
    
    # print, save, and return the wordcloud
    plt.imshow(wordcloud)
    wordcloud.to_file(output_filepath)
    return wordcloud.to_image()

### Fall 2022 UNCW tweets analysis

In [None]:
# get Fall 2022 uncw tweets (without retweets)
subject = "uncw"
query_subject = f"#{subject} -RT"
fromDate = 202208101600
toDate = 202211121600
number_of_tweets = 10000

fall_tweets = tweepy.Cursor(client.search_full_archive,
                       label = "development",
                       query = query_subject,
                       fromDate = fromDate,
                       expansions=author_id,
                       toDate = toDate).items(number_of_tweets)

all_fall_tweets = []

for tweet in fall_tweets:
    all_fall_tweets.append(tweet.text)

In [None]:
print(f"number of tweets: {len(all_fall_tweets)}")

In [None]:
# turn list into a string
fall_tweets_string = ''.join(str(x) for x in all_fall_tweets)

In [None]:
# tokenize tweets
fall_tokens = tokenize_text(text = fall_tweets_string)

In [None]:
# lematize tweets
fall_lemmatized_tokens = lemmatize_tokens(tokens = fall_tokens)

In [None]:
# print top tokens
fall_top_tokens = return_top_tokens(tokens = fall_lemmatized_tokens,
                                    top_N = 15)
print(fall_top_tokens)

In [None]:
# print top bigrams
fall_bigram_df = return_top_bigrams(tokens = fall_lemmatized_tokens,
                                    top_N = 10)
print(fall_bigram_df)

In [None]:
# see tweets sentiment
fall_sentiment_df = return_sentiment_df(tokens = fall_lemmatized_tokens)
print(fall_sentiment_df)

In [None]:
# print and save wordcloud
fall_uncw_wordcloud = ultimate_wordcloud_function(text = fall_tweets_string,
                                                output_filepath = r"YOUR FILEPATH HERE .png",
                                                width = 190,
                                                height = 220,
                                                background_color = "white",
                                                colormap = "viridis",
                                                collocations = False,
                                                max_words = 700,
                                                max_font_size = 40,
                                                min_font_size = 4,
                                                prefer_horizontal = 0.9,
                                                include_numbers = True)

In [None]:
# look at tweets regarding "online" and "winter"
[tweet for tweet in fall_tweets if "online" in tweet.lower()]

In [None]:
[tweet for tweet in fall_tweets if "winter" in tweet.lower()]

### Spring 2022 UNCW tweets analysis

In [None]:
# get Spring 2022 uncw tweets (without retweets)
subject = "uncw"
query_subject = f"#{subject} -RT"
fromDate = 202201121600
toDate = 202205161600
number_of_tweets = 10000

spring_tweets = tweepy.Cursor(client.search_full_archive,
                       label = "development",
                       query = query_subject,
                       fromDate = fromDate,
                       expansions=author_id,
                       toDate = toDate).items(number_of_tweets)

all_spring_tweets = []

for tweet in spring_tweets:
    all_spring_tweets.append(tweet.text)

In [None]:
print(f"number of tweets: {len(all_spring_tweets)}")

In [None]:
# turn list into a string
spring_tweets_string = ''.join(str(x) for x in all_spring_tweets)

In [None]:
# tokenize tweets
spring_tokens = tokenize_text(text = spring_tweets_string)

In [None]:
# lematize tweets
spring_lemmatized_tokens = lemmatize_tokens(tokens = spring_tokens)

In [None]:
# print top tokens
spring_top_tokens = return_top_tokens(tokens = spring_lemmatized_tokens,
                                    top_N = 15)
print(spring_top_tokens)

In [None]:
# print top bigrams
spring_bigram_df = return_top_bigrams(tokens = spring_lemmatized_tokens,
                                    top_N = 10)
print(spring_bigram_df)

In [None]:
# see tweets sentiment
spring_sentiment_df = return_sentiment_df(tokens = spring_lemmatized_tokens)
print(spring_sentiment_df)

In [None]:
# print and save wordcloud
spring_uncw_wordcloud = ultimate_wordcloud_function(text = spring_tweets_string,
                                                output_filepath = r"YOUR FILEPATH HERE .png",
                                                width = 190,
                                                height = 220,
                                                background_color = "white",
                                                colormap = "viridis",
                                                collocations = False,
                                                max_words = 700,
                                                max_font_size = 40,
                                                min_font_size = 4,
                                                prefer_horizontal = 0.9,
                                                include_numbers = True)