In [1]:
import os
import tweepy as tw
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk import bigrams
import itertools
import collections
import matplotlib.pyplot as plt
import networkx as nx
import datetime

In [2]:
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
 
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
 
api = tw.API(auth, wait_on_rate_limit=True)


In [None]:
search_words = "#Covid19 -filter:retweets"
date_since = "2020-06-24"

tweets = tw.Cursor(api.search, q=search_words, lang="en", until=date_since).items(99999)

In [None]:
def url_remover(txt):
    return " ".join(re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)", "", txt).split())

In [None]:
tweet_info = [[tweet.user.screen_name, tweet.user.location, url_remover(tweet.text), tweet.created_at] for tweet in tweets]
len(tweet_info)


In [None]:
tweet_data = pd.DataFrame(data=tweet_info, 
                    columns=['user', "location", "message",'date'])
tweet_data

In [None]:
stop_words = stopwords.words('english')
collection_words = ['covid19', 'covid', 'COVID19', 'COVID', 'Coronavirus', 'coronavirus']
stop_words = stop_words + collection_words
lists_wordlist = [i.lower().split() for i in tweet_data.message]
full_wordlist = list(itertools.chain(*lists_wordlist))
covid_wordlist = [word for word in full_wordlist if word not in stop_words]
covid_wordlist[:5]

In [None]:
word_counter = collections.Counter(covid_wordlist)

In [None]:
bills_words = pd.DataFrame(word_counter.most_common(25),
                          columns=['words', 'count'])

fig, ax = plt.subplots(figsize=(10, 10))

bills_words.sort_values(by='count').plot.barh(x='words',
                      y='count',
                      ax=ax,
                      color="royalblue")

ax.set_title("Common Words Found in Covid19 Tweets")

plt.show()


# Bigrams

In [None]:
tweets_nsw = [[word for word in tweets if not word in stop_words]
              for tweets in lists_wordlist]

In [None]:
tweets_nsw

In [None]:
# Create list of lists containing bigrams in tweets
terms_bigram = [list(bigrams(tweet)) for tweet in tweets_nsw]

# Flatten list of bigrams in clean tweets
bigrams = list(itertools.chain(*terms_bigram))

# Create counter of words in clean bigrams
bigram_counts = collections.Counter(bigrams)

In [None]:
bigram_counts.most_common(20)

In [None]:
bigram_df = pd.DataFrame(bigram_counts.most_common(50),
                             columns=['bigram', 'count'])

bigram_df

In [None]:
# Create dictionary of bigrams and their counts
d = bigram_df.set_index('bigram').T.to_dict('records')
# Create network plot 
G = nx.Graph()

# Create connections between nodes
for k, v in d[0].items():
    G.add_edge(k[0], k[1], weight=(v * 5))

fig, ax = plt.subplots(figsize=(20, 20))

pos = nx.spring_layout(G, k=2)

# Plot networks
nx.draw_networkx(G, pos,
                 font_size=16,
                 width=3,
                 edge_color='grey',
                 node_color='orange',
                 with_labels = False,
                 ax=ax)

# Create offset labels
for key, value in pos.items():
    x, y = value[0]+.135, value[1]+.045
    ax.text(x, y,
            s=key,
            bbox=dict(facecolor='grey', alpha=0.25),
            horizontalalignment='center', fontsize=13)
    
plt.show()