# Text Mining and Natural Language Processing

## Jamal Aziz

#### Read in data

In [4]:
#!pip install textblob
import nltk
from nltk.tokenize import word_tokenize  
from nltk.stem import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.porter import *
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
import re
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, classification_report,accuracy_score

In [5]:
#!pip install textblob

In [6]:
import pandas as pd


df = pd.read_csv('Data/ms4h05_data_anon.csv')

print(df.to_string()) 

      status_id                                               text                       annontation
0             0  PAGS PARODY! Plus, Joe talks coronavirus and h...                        irrelevant
1             1  #coronavirus shows that racists have absolutel...                        irrelevant
2             2  @Laura1206 @ShitChester I would expect front l...                        irrelevant
3             3  The coronavirus is not only affecting the way ...                        irrelevant
4             4  I hope your father recovers from the virus as ...                          politics
5             5  Delhi HC restrains publication of certain adve...       true public health response
6             6  China confirms new coronavirus as it investiga...       true public health response
7             7  Health authorities have identified a new coron...                              news
8             8  Chinese scientists believe the mysterious pneu...                         

#### 1. Change @twitterhandle to @user

In [7]:
df["text"][0] #example of a tweet with @username

'PAGS PARODY! Plus, Joe talks coronavirus and has special guest Peter J Hasson on his new book. - @JoeTalkShow https://t.co/IJE4gZwI0V'

In [8]:
import re
df['text'] = [re.sub(r'@\w+', '@User', x) for x in df["text"]] # eliminate usernames and replace with @User

In [9]:
df['text'][0] #above changes @JoeTalkShow to @User

'PAGS PARODY! Plus, Joe talks coronavirus and has special guest Peter J Hasson on his new book. - @User https://t.co/IJE4gZwI0V'

#### 2. Remove Stop-words

In [10]:
import nltk
stopwords = set(nltk.corpus.stopwords.words('english'))

#### Check impurities

The following code takes the texts and gives each text a score depending on the amount of impurities they have. An impurity is a text character such as a { or a \. We then look at the top 3 most impure tweets.

In [11]:
import re

RE_SUSPICIOUS = re.compile(r'[&<>{}\[\]\\]')

def impurity(text, min_len=10):
    """returns the share of suspicious characters in a text"""
    if text == None or len(text) < min_len:
        return 0
    else:
        return len(RE_SUSPICIOUS.findall(text))/len(text)

print(impurity(df['text'][1]))

0.0032679738562091504


In [12]:
import re
df['impurity'] = df['text'].apply(impurity,min_len=10)
#get top 3 records
df[['text', 'impurity']].sort_values(by='impurity', ascending=False).head(3)


Unnamed: 0,text,impurity
594,"For the 1989 edition onwards,[3][4] the novel'...",0.021818
453,Applying essential oil to anus 'cures coronavi...,0.020408
2119,"“In its decision, the FDA said, “It is no long...",0.019608


Our tweets are quite pure, however from these three we can conclude that we should get rid of links and various other phrases/characters. We run it through a program that gets rid of these, using regular expressions.

In [13]:
import html

def clean(text):
    # convert html escapes like &amp; to characters.
    text = html.unescape(text)
    # tags like <tab>
    text = re.sub(r'<[^<>]*>', ' ', text)
    # markdown URLs like [Some text](https://....)
    #text = re.sub(r'\[([^\[\]]*)\]\([^\(\)]*\)', r'\1', text)
    # text or code in brackets like [0]
    text = re.sub(r'\[[^\[\]]*\]', ' ', text)
    # standalone sequences of specials, matches &# but not #cool
    text = re.sub(r'(?:^|\s)[&#<>{}\[\]+|\\:-]{1,}(?:\s|$)', ' ', text)
    # standalone sequences of hyphens like --- or ==
    text = re.sub(r'(?:^|\s)[\-=\+]{2,}(?:\s|$)', ' ', text)
    # sequences of white spaces
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

Let's take a look at a tweet with many impurities:

In [14]:
df['text'][460]

'Instead of fighting over poo paper &amp; sanitizer,buy immune system support products like\nVitamin C\nMatcha powder\nVitamin B6,D\n\nAdd fruits &amp; vegetables such as\nBlueberries\nTumeric\nDark chocolate\n\nMake ur own sanitizer\nVodka/metholated spirits\nAloevera gel\nEssential oil\n#coronavirus https://t.co/5xIxVZR2WN'

Now we apply the above program to eliminate the impurities specfied in the program and see what the tweet looks like

In [15]:
text = df['text'][460]
clean_text = clean(text)
print(clean_text)
print("Impurity:", impurity(clean_text))

Instead of fighting over poo paper sanitizer,buy immune system support products like Vitamin C Matcha powder Vitamin B6,D Add fruits vegetables such as Blueberries Tumeric Dark chocolate Make ur own sanitizer Vodka/metholated spirits Aloevera gel Essential oil #coronavirus https://t.co/5xIxVZR2WN
Impurity: 0.0


The tweet now looks a lot better. We run this program on all tweets and check now the impurity scores.

In [16]:
df['clean_text'] = df['text'].map(clean)
df['impurity']   = df['clean_text'].apply(impurity, min_len=20)

df[['clean_text', 'impurity']].sort_values(by='impurity', ascending=False).head(3)

Unnamed: 0,clean_text,impurity
3268,In case anyone was wondering ... Cocaine Doesn...,0.008621
3611,>when your 5g tower causes people to get coron...,0.008475
1766,''38% of Americans wouldn’t buy Corona beer 'c...,0.007782


There are less impurities than before, let's look at an original tweet from the table above and it's cleaned version.
Original:

In [17]:
df['text'][1766]

"''38% of Americans wouldn’t buy Corona beer 'cause of the coronavirus.''Check the analysis on how a company name can have a significant impact on a company’s bottom line&gt;&gt;https://t.co/XzdyyoOznP\n\n#gew #entrepreneurship #company #brand https://t.co/DtcqDUGhCM"

Cleaned:

In [18]:
df['clean_text'][1766]

"''38% of Americans wouldn’t buy Corona beer 'cause of the coronavirus.''Check the analysis on how a company name can have a significant impact on a company’s bottom line>>https://t.co/XzdyyoOznP #gew #entrepreneurship #company #brand https://t.co/DtcqDUGhCM"

Cleaned looks better. We decide to keep websites because perhaps they will be informative in the analysis.

#### Tokenize the data

In [19]:
from sklearn.feature_extraction.text import CountVectorizer
vect = CountVectorizer().fit(df['clean_text'])
X_train = vect.transform(df['clean_text'])
print("X_train:\n{}".format(repr(X_train)))

X_train:
<3642x13122 sparse matrix of type '<class 'numpy.int64'>'
	with 95972 stored elements in Compressed Sparse Row format>


In [20]:
feature_names = vect.get_feature_names()
feature_names


['00',
 '000',
 '001',
 '0026tvwsqa',
 '01',
 '03',
 '03qwv6iatm',
 '03rshjtod6',
 '05gxcxyrog',
 '06',
 '076',
 '08ylle6yqf',
 '09glvcdszx',
 '0a4te3jxkr',
 '0axa7udeth',
 '0b53tqtqkb',
 '0d8otsqi9k',
 '0dftcbl5vp',
 '0dioykwt0q',
 '0gmluggz4d',
 '0hphxu8jc5',
 '0hugfenwze',
 '0jmscw0z4v',
 '0l2ris7ghh',
 '0lmm1o9k3j',
 '0mbz6er3qg',
 '0rrodzba5s',
 '0suyjea7gz',
 '0t57sffb6k',
 '0vyjgjlijv',
 '0w4tsgins1',
 '0w9itjbc9y',
 '0wdovrz8fl',
 '0zks6f7tvg',
 '10',
 '100',
 '1000',
 '10000',
 '1000s',
 '100k',
 '100mg',
 '100mil',
 '100s',
 '101',
 '101fm',
 '1022',
 '10k',
 '10min',
 '10ml',
 '10ozownwhi',
 '10s',
 '11',
 '110',
 '1140',
 '118',
 '11zvsxcl1q',
 '12',
 '121',
 '122',
 '1222',
 '13',
 '132',
 '137',
 '13th',
 '14',
 '14th',
 '14yrs',
 '15',
 '150',
 '15min',
 '15mins',
 '15z4mlzlfr',
 '16',
 '1619',
 '16fwkhgi7b',
 '17',
 '18',
 '1860',
 '18th',
 '19',
 '192',
 '1947',
 '1950s',
 '1951',
 '1955',
 '1960s',
 '1978',
 '1989',
 '19jwedab04',
 '19th',
 '1a',
 '1azil0jcvo',
 '1bbf

In [21]:
print("Number of features: {}".format(len(feature_names)))
print("First 20 features:\n{}".format(feature_names[:20]))
print("Features 2010 to 2030:\n{}".format(feature_names[2010:2030]))
print("Every 200th feature:\n{}".format(feature_names[::200]))

Number of features: 13122
First 20 features:
['00', '000', '001', '0026tvwsqa', '01', '03', '03qwv6iatm', '03rshjtod6', '05gxcxyrog', '06', '076', '08ylle6yqf', '09glvcdszx', '0a4te3jxkr', '0axa7udeth', '0b53tqtqkb', '0d8otsqi9k', '0dftcbl5vp', '0dioykwt0q', '0gmluggz4d']
Features 2010 to 2030:
['butters', 'buttons', 'butts', 'buxton', 'buy', 'buyers', 'buying', 'buys', 'bvlsqznuyw', 'bvrwaz6r9n', 'bwc', 'bwtlfta0ht', 'bxatvbrdw7', 'bxc2y45kqc', 'bxhdkwvrl8', 'by', 'bydoc', 'bye', 'byovlocapn', 'byproduct']
Every 200th feature:
['00', '2gycrbiht1', '5g', '8rhlt7suiv', 'adolescent', 'america', 'arrestjail', 'bad', 'bf', 'booster', 'buster', 'ccbzt2xk1p', 'chumps', 'comfortable', 'cookies', 'covidー19uk', 'dal', 'deprivation', 'disinfect', 'drives', 'electromagnetic', 'ethyl', 'fair', 'fitnessaddict', 'from', 'gesture', 'group', 'head', 'hope', 'ieg9f8tewx', 'infects', 'iq3hjzzc0i', 'jpwolqp0vt', 'korean', 'legitimately', 'lootings', 'maritimehealth', 'microscopic', 'mounting', 'ndzjoambq

In [22]:
print("Vocabulary size: {}".format(len(vect.vocabulary_)))
print("Vocabulary content:\n {}".format(vect.vocabulary_))

Vocabulary size: 13122
Vocabulary content:


In [23]:
bag_of_words = vect.transform(df['clean_text'])
print("bag_of_words: {}".format(repr(bag_of_words)))

bag_of_words: <3642x13122 sparse matrix of type '<class 'numpy.int64'>'
	with 95972 stored elements in Compressed Sparse Row format>


#### Stopwords

In [24]:
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
print("Number of stop words: {}".format(len(ENGLISH_STOP_WORDS)))
print("Every 10th stopword:\n{}".format(list(ENGLISH_STOP_WORDS)[::10]))

Number of stop words: 318
Every 10th stopword:
['may', 'elsewhere', 'how', 'nor', 'formerly', 'across', 'anywhere', 'thereupon', 'much', 'either', 'interest', 'one', 'afterwards', 'put', 'something', 'whence', 'fifteen', 'cry', 'whom', 'whereupon', 'once', 'done', 'have', 'what', 'above', 'perhaps', 'mine', 'con', 'off', 'alone', 'beside', 'hence']


In [25]:
#ngram_range means we use 1 word and 2 word combinations in our analysis. ie ngrams.
vect = CountVectorizer(min_df=5, stop_words="english", ngram_range=(1, 2)).fit(df['clean_text'])
X_train = vect.transform(df['clean_text'])
print("X_train with stop words:\n{}".format(repr(X_train)))

X_train with stop words:
<3642x2572 sparse matrix of type '<class 'numpy.int64'>'
	with 52118 stored elements in Compressed Sparse Row format>


In [26]:
#nltk.download('wordnet')
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\T430\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [27]:
import nltk
from nltk.tokenize import word_tokenize  
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.porter import *
p_stemmer = PorterStemmer()
def nltk_process(text):
    #Tokenization
    nltk_tokenList = word_tokenize(text)
    
    #Stemming
    #nltk_stemedList = []
    #for word in nltk_tokenList:
    #    nltk_stemedList.append(p_stemmer.stem(word))
    
    #Lemmatization
    wordnet_lemmatizer = WordNetLemmatizer()
    nltk_lemmaList = []
    for word in nltk_tokenList:
        nltk_lemmaList.append(wordnet_lemmatizer.lemmatize(word))
    
    print("Lemmatization")
    print(nltk_lemmaList)
    #Filter stopword
    filtered_sentence = []  
    nltk_stop_words = set(stopwords.words("english"))
    for w in nltk_lemmaList:  
        if w not in nltk_stop_words:  
            filtered_sentence.append(w)
    #Removing Punctuation
    #punctuations="?:!.,;"
    #for word in filtered_sentence:
    #    if word in punctuations:
    #        filtered_sentence.remove(word)
    #print(" ")
    #print("Remove stopword & Punctuation")
    #print(filtered_sentence)

In [28]:
df['normal_text'] = df['clean_text'].map(nltk_process)

Lemmatization
['PAGS', 'PARODY', '!', 'Plus', ',', 'Joe', 'talk', 'coronavirus', 'and', 'ha', 'special', 'guest', 'Peter', 'J', 'Hasson', 'on', 'his', 'new', 'book', '.', '@', 'User', 'http', ':', '//t.co/IJE4gZwI0V']
Lemmatization
['#', 'coronavirus', 'show', 'that', 'racist', 'have', 'absolutely', '2', 'much', 'time', 'on', 'their', 'hand', 'Firstly', ',', 'those', 'racist', 'who', 'hate', 'POC', 'R', 'stuck', 'in', '#', 'XenophobicWorld', 'are', 'proud', '#', 'deplorables', 'which', 'mean', 'they', 'did', "n't", 'even', 'beleev', '#', 'coronavirus', 'existed', 'until', 'a', 'day', 'or', '2', 'ago', 'when', 'Trump', 'did', 'They', "'re", 'quick', 'study', 'http', ':', '//t.co/p22j3Ti6kK']
Lemmatization
['@', 'User', '@', 'User', 'I', 'would', 'expect', 'front', 'line', 'staff', 'to', 'be', 'more', 'of', 'a', 'risk', 'of', 'having', 'coronavirus', 'than', 'the', 'average', 'person', '.', 'Hence', 'why', 'there', 'is', 'talk', 'of', 'nurse', 'etc', 'having', 'to', 'stay', 'at', 'work',

['@', 'User', '@', 'User', '@', 'User', '@', 'User', '@', 'User', '@', 'User', '@', 'User', '@', 'User', '@', 'User', '@', 'User', '@', 'User', 'China', '’', 's', 'effort', 'in', 'February', 'at', 'fumigation', 'obviously', 'ha', 'helped', 'in', 'containing', 'the', 'virus', 'spread', 'by', 'now', '.', 'Pl', 'make', 'it', 'mandatory', 'for', 'all', 'our', 'state', 'and', 'union', 'territory', 'to', 'fumigate', 'the', 'country', 'in', 'our', 'fight', 'against', 'the', 'virus', '.', 'http', ':', '//t.co/9xJNNaUaFH']
Lemmatization
['@', 'User', '@', 'User', '@', 'User', 'Yeah', '...', 'some', 'system', '.', 'Hospitals', 'ca', "n't", 'get', 'the', 'COVID19', 'test', ',', 'and', 'the', 'outbreak', 'is', 'spreading', 'at', 'an', 'alarming', 'rate', '.', 'Instead', 'of', 'getting', 'an', 'actual', '...', 'you', 'know', '...', 'DOCTOR', 'to', 'head', 'up', 'the', 'task', 'force', ',', 'he', 'ha', '``', 'pray', 'it', 'all', 'away', "''", 'Mike', 'Pence', ',', 'who', 'wo', "n't", 'have', 'woman'

Lemmatization
['``', 'cocaine', 'doe', 'not', 'protect', 'against', 'covid19', "''"]
Lemmatization
['Thread', 'by', '@', 'User', ':', '1', '.', 'Wuhan', 'Coronavirus', 'is', 'an', 'engineered', 'offensive', 'bioweapon', '.', 'You', "'re", 'not', 'being', 'told', 'this', 'by', 'the', 'Mockingbird', 'medium', 'he', 'US', 'Gov', 'is', 'directly', 'responsible', 'for', 'the', 'global', 'bioweapon', 'arm', 'race', '.', 'We', "'ve", 'spent', 'somewhere', 'in', 'the', 'neighborhood', 'of…', 'http', ':', '//t.co/MlA5L5Xjzg']
Lemmatization
['Saudi', 'close', 'school', ',', 'university', 'because', 'of', 'coronavirus', 'http', ':', '//t.co/5jWz72c87u', 'via', '@', 'User']
Lemmatization
['Suicide', 'is', 'still', 'killing', 'more', 'people', '(', 'esp', '.', 'young', 'men', ')', 'than', 'the', 'Coronavirus', 'so', 'let', "'s", 'all', 'stock', 'up', 'on', 'kindness', 'rather', 'than', 'loo', 'roll', 'and', 'bleach', '.', 'We', "'re", 'always', 'here', 'if', 'you', 'need', 'a', 'chat', 'and', 'a', 

Lemmatization
['A', 'quick', 'glance', 'at', 'social', 'medium', 'platform', 'will', 'provide', 'a', 'slew', 'of', 'these', 'fake', 'cure', ':', 'garlic', ',', 'bleach', ',', 'even', 'cocaine', '.', 'http', ':', '//t.co/uUMlEioaay']
Lemmatization
['Dad', '’', 's', 'home', 'health', 'nurse', 'repeating', 'the', 'rumor', 'that', 'COVID-19', 'is', 'a', 'Chinese', 'bioweapon', '.', '🤯🤬']
Lemmatization
['At', 'least', '27', 'people', 'in', 'Iran', 'died', 'overnight', 'from', 'alcohol', 'poisoning', ',', 'a', 'some', 'turned', 'to', 'ingesting', 'industrial-grade', 'ethanol', 'and', 'methanol', 'in', 'a', 'misguided', 'effort', 'to', 'stave', 'off', 'the', 'coronavirus', 'tearing', 'through', 'the', 'country', '.', 'http', ':', '//t.co/L95w4j4M15', '#', 'CoronavirusOutbreak', '#', 'COVID2019', '#', 'COVIDー19']
Lemmatization
['Where', 'are', 'all', 'the', 'essential', 'oil', 'people', 'with', 'their', 'cure', 'for', 'coronavirus', '?', '?', 'Karen', 'where', 'you', 'at', 'boo', '?', '?', '?'

Lemmatization
['With', 'the', 'only', 'thing', 'spreading', 'faster', 'than', '#', 'COVID19', 'is', 'the', 'misinformation', 'about', 'this', '#', 'coronavirus', ',', 'The', 'Lancet', 'published', 'a', 'statement', 'clearing', 'up', 'misconception', '.', 'http', ':', '//t.co/B5JRpmM8gy', 'http', ':', '//t.co/PWQyUVwh4E']
Lemmatization
['Russia', ':', 'no', ',', 'garlic', 'and', 'sesame', 'oil', 'won', '’', 't', 'help', 'with', 'the', 'covid-19', '.', 'Ireland', ':', 'don', '’', 't', 'drink', 'water', 'every', '15', 'min', ',', 'it', 'won', '’', 't', 'help', 'with', 'the', 'virus', '.', 'France', ':', 'nope', ',', 'cocaine', 'isn', '’', 't', 'the', 'best', 'remedy', 'http', ':', '//t.co/mZbVi2urZj']
Lemmatization
['One', 'more', 'time', 'for', 'all', 'the', 'Americans', 'distracted', 'by', 'the', "'news", "'", 'about', 'Tom', 'Hanks', 'Rita', 'Wilson', 'have', '#', 'coronavirus', ',', 'Disneyland', 'being', 'closed', ',', 'the', 'NBA', 'being', 'canceled', ',', 'or', 'that', 'the', 'Sto

Lemmatization
['@', 'User', '@', 'User', 'What', 'BS', '.', 'ALL', 'lie', '.', '#', 'DementiaDon', 'called', 'the', '#', 'coronavirus', 'a', 'hoax', ',', 'did', 'nothing', 'while', 'Americans', 'died', 'and', 'fired', 'the', 'people', 'who', 'would', 'have', 'prevented', 'the', 'virus', 'from', 'becoming', 'so', 'deadly', '.', 'He', 'then', 'took', '#', 'NoResponsibilityTrump', 'for', 'his', 'total', 'incompetence', '.', '#', 'VoteBiden', 'to', '#', 'SaveAmerica', '#', 'MAGA']
Lemmatization
['I', 'have', 'a', '2-part', 'poll', 'question', ':', 'How', 'do', 'you', 'think', 'the', 'U.S.', 'government', 'ha', 'handled', 'the', 'coronavirus', 'situation', 'so', 'far', ',', 'and', 'which', 'way', 'do', 'your', 'personal', 'politics', 'lean', '?']
Lemmatization
['Johnson', 'U-turns', 'on', 'virus', 'with', 'step', 'to', 'ban', 'mass', 'event', 'http', ':', '//t.co/mqiK6ooNdU']
Lemmatization
['@', 'User', 'These', 'thing', 'are', 'to', '``', 'reinforce', 'and', 'boost', "''", 'inmune', 'syste

Lemmatization
['@', 'User', '@', 'User', '@', 'User', 'You', 'want', '``', 'evidence', "''", '?', 'How', 'about', 'this', 'map', 'of', 'skin', 'pigmentation', 'level', ',', 'compared', 'against', 'the', 'Coronavirus', 'and/or', '5G', 'map', '?', 'Does', '5G', 'testing', 'bleach', 'your', 'skin', ',', 'too', '?', 'Maybe', 'it', 'activates', 'the', 'oxygen', 'in', 'the', 'air', ',', 'and', 'we', 'know', 'that', '``', 'active', 'oxygen', "''", 'is', 'used', 'in', 'some', 'bleach', ',', 'so', 'PROOF', '!', 'http', ':', '//t.co/3QwpxOsbTW']
Lemmatization
['Tiny', ':', '``', 'I', "'m", 'not', 'scared', 'of', 'the', 'coronavirus', ',', 'I', 'just', 'am', 'not', 'in', 'the', 'mood', 'to', 'get', 'it', ',', 'so', 'I', "'m", 'going', 'to', 'stay', 'inside', '.', "''"]
Lemmatization
['One', 'of', 'the', 'best', 'anti-virals', 'against', '#', 'Covid19', 'is', 'using', 'silver', '.', 'While', 'colloidal', 'silver', 'top', 'out', 'at', '20', 'ppm', 'concentration', ',', 'chelated', 'silver', 'go', '

Lemmatization
['It', '’', 's', 'just', 'occurred', 'to', 'me', ',', 'sniffing', ',', 'trouble', 'breathing', ',', 'temperature', ',', 'shaking', 'etc', '.', 'It', '’', 's', 'not', 'the', 'Coronavirus', ',', 'it', '’', 's', 'just', 'people', 'going', 'on', 'a', 'massive', 'cocaine', 'bender', '.', 'The', 'fact', 'that', 'it', '’', 's', 'often', 'cut', 'with', 'baby', 'laxative', 'even', 'explains', 'the', 'whole', 'bog-roll', 'thing', '.', 'Solved', '#', 'Corvid19uk']
Lemmatization
['COVID-19', 'came', 'from', 'China', '.', 'Some', 'folk', 'may', 'feel', 'like', 'they', "'ll", 'be', 'damned', 'if', 'they', "'re", 'going', 'to', 'change', 'their', 'lifestyle', 'over', 'what', 'may', 'seem', 'like', 'a', 'foreign', 'bioweapon', 'scare', '.', '(', 'Related', ':', 'remember', 'when', 'Bush', 'told', 'everyone', 'to', '``', 'go', 'shopping', "''", 'after', '9/11', '?', 'That', 'strategy', 'doe', "n't", 'help', 'in', 'a', 'pandemic', '.', ')']
Lemmatization
['#', 'Cuba', 'seems', 'to', 'be', 

Lemmatization
['@', 'User', 'Horse', 'meat', 'is', "n't", 'something', 'we', 'eat', 'voluntarily', 'or', 'knowingly', 'And', 'gator', 'do', "n't", 'carry', 'strain', 'of', 'Coronavirus', 'Does', "n't", 'matter', 'anyways', 'because', 'it', "'s", 'far', 'more', 'plausible', 'that', 'this', 'strain', 'of', 'Coronavirus', 'is', 'a', 'leaked', 'bioweapon']
Lemmatization
['Cheffing', 'it', 'up', 'today', 'for', '#', 'mealprep', '.', '#', 'coronavirus', '#', 'quarantine', 'Menu', ':', 'Cumin', 'Blackbeans', 'Spring', 'Broccoli', 'with', 'Garlic', 'maranara', 'Black', 'Pepper', 'Crusted', 'Boneless', 'Pork', 'Chip', '(', 'Cut', 'on', 'bias', ')', '****', 'YES', 'I', 'cooked', 'it', '!', '!', '!', '*****', '@', 'East…', 'http', ':', '//t.co/16fWkHGI7b']
Lemmatization
['#', 'Trump', 'keep', 'acting', 'like', 'he', 'ha', 'a', 'medical', 'affinity', 'and', 'invented', 'the', 'solution', 'to', 'cure', '#', 'coronavirus', '.', 'And', 'boy', ',', 'is', 'he', 'going', 'to', 'FORCE', 'THIS', 'CURE', '

['@', 'User', '@', 'User', '@', 'User', 'The', 'drug', 'is', 'anti-malarial', 'and', 'is', 'also', 'used', 'by', 'autoimmune', 'disease', 'patient', '.', 'Their', 'theory', 'is', 'that', 'we', '’', 're', 'all', 'on', 'this', 'drug', 'because', 'Africa', 'is', 'Malaria', 'infested', ',', 'making', 'u', 'somewhat', 'immune', 'to', 'COVID19', '...', 'which', 'is', 'quite', 'dumb', '.']
Lemmatization
['The', 'poker', 'world', 'is', 'all', 'laughing', 'at', 'that', '``', 'Hair', 'Dryer', 'Kills', 'Coronavirus', "''", 'video', 'from', 'a', 'TD', ',', 'but', 'it', "'s", 'really', 'not', 'funny', '.', 'Gullible', 'people', 'believe', 'that', 'shit', '.', 'http', ':', '//t.co/zjyv1cIypw']
Lemmatization
['@', 'User', 'Someone', 'told', 'me', ',', 'in', 'support', 'of', 'a', 'conspiracy', 'theory', ',', '``', 'well', 'on', 'the', 'side', 'of', 'a', 'bottle', 'of', 'bleach', 'it', 'say', 'it', 'kill', 'coronavirus', "''", 'and', 'I', "'m", 'like', '...', "y'all", 'there', 'are', 'lot', 'of', 'coro

['@', 'User', 'Malaria', '👈', '👉COVID19', 'Don', '’', 't', 'seem', 'to', 'be', 'the', 'same', 'disease', ',', 'unlikely', 'to', 'have', 'the', 'same', 'remedy', '.', 'But', 'nice', 'to', 'know', 'you', 'are', 'still', 'willing', 'to', 'sell', 'your', 'soul', 'for', 'Donald', '.']
Lemmatization
['@', 'User', 'Because', 'it', '’', 's', 'for', 'Malaria', '.', 'Dip', 'shit', '.', 'That', '’', 's', 'not', 'what', 'the', 'topic', 'is', 'about', '.', 'Idiot', '.', 'Y', '’', 'all', 'just', 'love', 'to', 'hear', 'yourselves', 'talk', ',', 'don', '’', 't', 'you', '?', 'But', 'Yes', '!', 'Keep', 'saying', 'that', 'prescription', 'medication', 'is', 'okay', 'to', 'take', 'if', 'your', 'symptomatic', 'with', 'covid19', '.', 'Y', '’', 'all', 'are', 'going', 'to', 'get', 'so', 'sued', '.', 'Malfeasance', '.']
Lemmatization
['@', 'User', 'The', 'bat', 'soup', 'BS', 'is', 'cover', 'for', 'the', 'Wuhan', 'COVID-19', 'bioweapon', 'release', '.', 'Do', "n't", 'be', 'so', 'naieve', '.']
Lemmatization
['@',

Lemmatization
['The', 'thing', 'I', 'find', 'amusing', 'at', 'present', ',', 'is', 'there', 'a', 'multiple', 'conspiracy', 'theory', 'about', 'the', 'SARS-CoV-2', 'coronavirus', '.', 'How', 'can', 'it', 'simultaneously', 'be', 'the', 'work', 'of', 'the', 'global', 'elite', ',', 'a', 'by', 'product', 'of', '5G', 'rollout', ',', 'a', 'bioweapon', 'and', 'whatever', 'else', 'is', 'circulating', 'out', 'there', '?']
Lemmatization
['Both', 'America', "'s", 'Blood', 'Centers', 'whose', 'member', 'organization', 'collect', 'nearly', '60', '%', 'of', 'the', 'US', 'blood', 'supply', 'and', 'the', 'American', 'Red', 'Cross', 'told', '@', 'User', 'they', 'do', 'not', 'test', 'for', 'COVID-19', 'http', ':', '//t.co/QoCfzGe8G9', '#', 'CoronaVirusFacts', '2/14']
Lemmatization
['@', 'User', 'That', 'is', 'for', 'malaria', 'prevention', '.', 'Malaria', 'is', 'caused', 'by', 'infected', 'mosquito', '.', 'Covid19', 'is', 'not', 'carried', 'by', 'mosquito', '.', 'You', 'used', 'that', 'drug', 'for', 'the

Lemmatization
['#', 'ConspiracyTheories', '.', '``', '..snippets', 'of', 'misinformation', 'warped', '...', 'through', 'social', 'medium', ',', 'Covid-19', 'became—amongst', 'other', 'dangerous', 'nonsense—a', 'byproduct', 'of', 'bat', 'soup', ',', 'an', 'escaped', 'bioweapon', ',', '..', '.treatable', 'by', 'Lysol', ',', 'oregano', 'oil', ',', 'or', ',', 'worse', 'yet', ',', 'gargling', 'with', 'bleach', '.', "''", 'http', ':', '//t.co/7dy0WL7xew']
Lemmatization
['@', 'User', '@', 'User', 'I', "'ve", 'come', 'to', 'the', 'conclusion', 'the', 'Democrats', 'are', 'just', 'scum', 'I', 'took', 'that', 'drug', 'twice', 'to', 'prevent', 'malaria', 'it', "'s", 'completely', 'safe', 'and', 'it', 'wo', "n't", 'hurt', 'you', 'and', 'if', 'it', 'help', 'cure', 'the', 'Coronavirus', 'that', "'s", 'a', 'good', 'thing', 'the', 'Democrats', 'just', 'do', "n't", 'care']
Lemmatization
['@', 'User', 'Yes', '.', 'The', 'doctor', 'should', 'be', 'held', 'accountable', 'if', 'they', 'don', '’', 't', 'test

Lemmatization
['Our', 'DMs', 'are', 'open', '.', 'If', 'you', 'see', 'social', 'medium', 'post', 'you', 'think', 'we', 'should', 'look', 'into', ',', 'please', 'send', 'them', 'to', 'u', 'and', 'we', "'ll", 'try', 'to', 'investigate', '.', 'You', 'can', 'view', 'our', 'verification', 'methodology', 'here', ':', 'http', ':', '//t.co/P1gJI406E6', '#', 'FactCheckingDay']
Lemmatization
['UK', 'medium', 'outlet', 'told', 'not', 'to', 'promote', 'baseless', '5G', 'coronavirus', 'theory', 'http', ':', '//t.co/Qb8BS32n3o']
Lemmatization
['@', 'User', 'you', "'d", 'think', 'a', 'bioweapon', 'would', 'have', 'a', 'higher', 'fatality', 'rate', '.', 'not', 'downplaying', 'coronavirus', ',', 'but', 'it', "'s", 'no', 'ebola', '(', 'in', 'term', 'of', 'lethality', ')']
Lemmatization
['Ayurvedic', 'formulation', 'cured', 'Prince', 'Charles', "'", 'coronavirus', ':', 'Minister', 'Shripad', 'Naik', ',', 'citing', 'doctor', '#', 'Coronavirus', '#', 'COVID19', '#', 'COVID2019', '#', 'Virus', 'http', ':', 

Lemmatization
['#', 'IYCMI', ':', 'Texas', 'Attorney', 'General', 'Ken', 'Paxton', 'and', 'FTC', 'blast', 'Arlington', 'company', 'over', 'false', 'coronavirus', 'cure', 'claim', 'http', ':', '//t.co/YntGSgPfhX']
Lemmatization
['Bogus', 'Cures', 'and', 'Unapproved', 'Treatments', 'for', 'COVID-19', 'Flood', 'the', 'Internet', '#', 'covidscams', 'http', ':', '//t.co/2sYLkfPaMK']
Lemmatization
['Facebook', 'will', 'start', 'steering', 'user', 'who', 'interact', 'with', 'coronavirus', 'misinformation', 'to', 'WHO', 'http', ':', '//t.co/gtP8hXXl0T']
Lemmatization
['Facebook', 'will', 'alert', 'people', 'who', 'have', 'interacted', 'with', 'coronavirus', 'misinformation', '``', 'The', 'message', '—', 'which', 'will', 'appear', 'in', 'user', '’', 'News', 'Feeds', '—', 'will', 'direct', 'people', 'to', 'official', ',', 'credible', 'information', 'from', 'the', 'World', 'Health', 'Organization', "''", 'http', ':', '//t.co/sc0fgn4C0k', '@', 'User', 'http', ':', '//t.co/l7ehhZ05E1']
Lemmatizatio

Lemmatization
['When', 'you', 'wear', 'a', 'Mask', ',', 'you', 'not', 'only', 'protect', 'yourself', ',', 'you', 'protect', 'others', 'too', '.', '#', 'airmask', '#', 'covid19', '#', 'covidindia', '#', 'coronavirusinindia', '#', 'COVID2019india', '#', 'CoronavirusOutbreakindia', '#', 'mask', 'http', ':', '//t.co/oBVJpPoWUA']
Lemmatization
["'Drinking", 'alcohol', 'will', 'remove', 'coronavirus', 'from', 'throat', "'", ':', 'MLA', 'urge', 'Ashok', 'Gehlot', 'to', 'reopen', 'liquor', 'shop', 'http', ':', '//t.co/91QrQeg49K']
Lemmatization
['4', 'Things', 'to', 'be', 'done', 'for', 'life', 'to', 'return', 'to', 'normal', 'a', 'coronavirus', 'persists', '–', 'Bill', 'Gates', 'http', ':', '//t.co/IeG9f8teWX']
Lemmatization
['Man', 'Made', '!', '#', 'coronavirus', 'http', ':', '//t.co/DsZAAJ7f26']
Lemmatization
['The', '5G', 'coronavirus', 'conspiracy', 'theory', 'show', 'how', 'easily', 'misinformation', 'spread', '—', 'and', 'how', 'dangerous', 'it', 'is', 'The', 'Washington', 'Post', 'htt

Lemmatization
['@', 'User', 'Thanks', 'for', 'taking', 'action', '!', 'Here', "'s", 'the', 'full', 'list', 'of', 'coronavirus', 'conspiracy', 'theory', 'website', 'you', 'should', 'blacklist', 'to', 'make', 'sure', 'this', 'doe', "n't", 'happen', 'again', ':', 'http', ':', '//t.co/1nwTNXf43y']
Lemmatization
['#', 'Forget', '#', 'toilet', '#', 'paper', ',', '#', 'garlic', 'is', 'now', 'in', '#', 'short', '#', 'supply', 'due', 'to', '#', 'Covid-19', 'http', ':', '//t.co/W4GyfiYrTS']
Lemmatization
['Interfax', 'report', 'that', 'Kadyrov', 'is', 'in', 'a', 'Moscow', 'hospital', 'w/', 'suspected', 'coronavirus', '.', 'The', 'Chechen', 'leader', 'previously', 'dismissed', 'fear', 'of', 'the', 'virus', ',', 'telling', 'ppl', 'to', 'drink', 'water', 'with', 'lemon', 'honey', 'to', 'strengthen', 'their', 'immune', 'system', 'and', 'eat', 'garlic', 'for', '``', 'pure', 'blood', '”', 'http', ':', '//t.co/5tOlENGn71']
Lemmatization
['With', 'more', 'of', 'Trump', '’', 's', 'supporter', 'drinking',

Lemmatization
['President', 'Trump', 'hold', 'press', 'briefing', ',', 'a', 'state', 'continue', 'to', 'reopen', 'and', 'the', 'U.S.', 'approach', '100,000', 'coronavirus', 'death', '.', 'http', ':', '//t.co/jZuzjOdWSX', 'http', ':', '//t.co/sd2LVae1Ig']
Lemmatization
['Ignorance', 'is', 'a', 'killer', '.', '#', 'coronavirus', '#', 'COVID19', '#', 'Pandemic', '#', 'ReopenAmerica']
Lemmatization
['Florida', 'Man', 'Spits', 'on', 'Cops', 'While', 'Claiming', 'to', 'Have', 'Coronavirus', '.', 'Justice', 'Department', 'Accuses', 'Him', 'of', '‘', 'Bioweapon', 'Hoax.', '’', 'http', ':', '//t.co/YseNdVvgh6']
Lemmatization
['For', 'personal', 'use', 'for', 'your', 'business', '!', 'Premiun', 'sanitize', '#', 'Nurinse®', 'available', 'in', '#', 'Edmonton', '.', 'High', 'grade', '80', '%', 'Ethanol', 'for', 'proper', '#', 'sanitation', '.', 'With', 'glycerin', 'hydrogen', 'peroxide', '.', 'We', 'can', 'ship', '.', 'Buy', 'now', ':', 'http', ':', '//t.co/hkL4wkXF4c', '#', 'handsanitizer', '#', '

Lemmatization
['Drinking', 'Corona', 'beer', 'doe', 'not', 'cause', 'the', 'coronavirus', '.', 'Ordering', 'Corona', 'in', 'a', 'bar', 'might', 'get', 'you', 'punched', '.', '``', 'Dude', 'can', 'I', 'have', 'some', 'Corona', '...', "''", 'BIFF', 'BANG', 'POW', 'ACK']
Lemmatization
['Why', 'deny', 'a', 'stupid', 'conspiracy', '?', '👇👇', 'False', 'claim', ':', 'Britain', "'s", 'new', '£20', 'note', 'show', '5G', 'mast', 'and', 'coronavirus', 'symbol', 'http', ':', '//t.co/WMS9gGEgDC']
Lemmatization
['Garlic', 'won', '’', 't', 'do', 'much', 'for', 'COVID', '.', 'Find', 'other', 'way', 'to', 'keep', 'your', 'friend', '2', 'meter', 'away', '.', 'Staying', 'informed', 'is', '#', 'inourhands', '#', 'COVID19', '#', 'MythBusters', '#', 'COVIDー19', '#', 'EidUlFitr', '#', 'coronavirus', '#', 'staysafe', '@', 'User', '@', 'User', '@', 'User', '@', 'User', 'http', ':', '//t.co/5Y1ztQkpSt']
Lemmatization
['#', 'lie', 'Taking', 'a', 'hot', 'bath', 'doe', 'not', 'prevent', 'the', 'new', 'coronavirus'

Lemmatization
['Wait', ',', 'you', 'don', '’', 't', 'get', 'Coronavirus', 'by', 'drinking', 'a', 'Corona', '?', 'So', 'I', '’', 've', 'been', 'sending', 'corona', 'beer', 'to', 'my', 'family', 'for', 'nothing', '?', 'Great', '.']
Lemmatization
['Alyssa', 'say', 'to', 'EAT', 'CARBON', 'to', 'fight', 'COVID19', '!', 'As', 'bad', 'a', 'Trump', 'with', 'the', 'bleach', '.', 'http', ':', '//t.co/wM8Bh1Ry5R']
Lemmatization
['Kids', 'in', 'Boston', 'and', 'Philadelphia', 'are', 'now', 'showing', 'strange', 'symptom', 'of', 'the', 'coronavirus', 'Trump', 'SAYS', 'IT', 'IS', 'SAFE', 'FOR', 'school', 'to', 'reopen', 'during', 'coronavirus', 'AND', 'TO', 'DRINK', 'BLEACH', 'http', ':', '//t.co/D1T9LFHlRa']
Lemmatization
['I', 'TRIED', 'FOLLOWING', 'A', 'VIRAL', 'TIKTOK', 'BLEACH', 'DYE', 'TUTORIAL', 'http', ':', '//t.co/gF0Ov2KMZh', 'via', '@', 'User', '#', 'memorialdayweekend2020', '#', 'COVIDIOT', '#', 'RIPHanaKimura', '#', 'Burnaboy', '#', 'planecrashkarachi', '#', 'unc0ver', '#', 'DoubleOrNot

Lemmatization
['Also', 'more', 'ppl', 'die', 'from', 'the', 'flu', 'each', 'year', ',', 'i', 'dont', 'see', 'anyone', 'in', 'the', 'office', 'wearing', 'a', 'mask', 'while', 'they', 'have', 'the', 'flu', ',', 'nonstop', 'coughing', '.', 'Think', 'politics', 'ppl', '.', 'Blues', 'state', 'closed', 'and', 'red', 'state', 'open', '.', 'Oh', 'and', 'their', 'is', 'cure', 'it', 'called', 'hydroxychloroquine', ',', 'so', 'stop', 'living', 'in', 'fear', '.', 'http', ':', '//t.co/ya1isJTfMG']
Lemmatization
['There', 'is', 'no', 'substantial', 'evidence', 'that', '#', 'hydroxychloroquine', '(', '#', 'HCQ', ')', 'can', 'help', 'prevent', 'or', 'cure', '#', 'COVID19', 'and', 'emerging', 'research', 'suggests', 'that', 'it', 'could', 'even', 'be', 'harmful', 'to', 'patients।', 'http', ':', '//t.co/o8y4i7q3Gc']
Lemmatization
['Interesting', 'thread', 'of', 'conspiracy', 'mixed', 'with', 'fact', '.', 'Worth', 'a', 'read', '!', '!', 'Thread', 'by', '@', 'User', ':', '1', '.', 'Wuhan', 'Coronavirus', 

Lemmatization
['Large', 'Study', 'Finds', 'No', 'Benefit', 'of', 'Hydroxychloroquine', 'for', 'COVID-19', 'Time', 'http', ':', '//t.co/WJ3tV5Dmd7']
Lemmatization
['errr', '@', 'User', '#', 'lie', '5G', 'mobile', 'network', 'DO', 'NOT', 'spread', 'COVID-19', '.', '#', 'coronavirus', '#', 'moronavirus', '#', 'covidiots', '#', 'covid19', '#', 'WHO']
Lemmatization
['Any', 'Dr.', 'that', 'understands', 'science', ':', '#', 'Hydroxychloroquine', 'doe', 'NOT', 'CURE', '#', 'COVID__19', 'Trump', 'his', 'supporter', ':', 'http', ':', '//t.co/3dWcapYzes']
Lemmatization
['More', 'Evidence', 'Hydroxychloroquine', 'Is', 'Ineffective', ',', 'Harmful', 'in', 'COVID-19', 'http', ':', '//t.co/kMy1YaaEUH', 'Sent', 'using', 'the', 'Medscape', 'App', 'for', 'Android™']
Lemmatization
['Alright', 'this', 'ha', 'gone', 'too', 'far', '//', 'Garlic', 'shortage', 'hit', ',', 'a', 'side-effect', 'of', 'the', 'coronavirus', 'pandemic', 'http', ':', '//t.co/BAghl4QIud', 'http', ':', '//t.co/j24oIwWrrw']
Lemmatizat

Lemmatization
['Joe', 'Biden', 'ha', 'been', 'patriotically', 'humanely', 'staying', 'at', 'home', 'to', 'help', 'flatten', 'the', 'Covid19', 'curve', 'save', 'American', 'life', '.', 'Trump', '’', 's', 'been', 'hawking', 'dangerous', 'drug', 'bleach', 'cure', 'from', 'his', 'illegal', 'rally', 'at', 'the', 'presidential', 'podium', 'and', 'not', 'coordinating', 'any', 'response', 'to', 'the', 'virus', '.', 'Biden2020', '.', 'http', ':', '//t.co/nzGlqPZn6h']
Lemmatization
['Accidentally', 'huffed', 'a', 'BUNCH', 'of', 'bleach', 'while', 'deep', 'cleaning', 'my', 'shower', '.', 'Bummed', ',', 'cuz', 'my', 'nostril', 'really', 'hurt', ',', 'but', 'I', '’', 'm', 'thrilled', 'because', 'I', '’', 'm', 'now', 'immune', 'to', 'the', 'Coronavirus', 'right', ',', '@', 'User']
Lemmatization
['An', 'interview', 'with', 'an', 'Essential', 'Oil', 'Guru', 'will', 'be', 'held', 'to', 'provide', 'you', 'with', 'what', 'you', 'need', 'to', 'know', 'when', 'it', 'come', 'to', 'the', 'proper', 'use', 'an

Lemmatization
['Redemsevir', 'wa', 'proven', 'NOT', 'working', 'to', 'cure', 'corona', 'but', 'hydroxychloroquine', 'is', 'http', ':', '//t.co/J6ldIL9ijd']
Lemmatization
['If', 'Donald', 'Trump', '’', 's', 'follower', 'want', 'to', 'take', 'hydroxychloroquine', ',', 'who', 'are', 'we', 'to', 'stop', 'them', '?', 'Chicago', 'Tribune', 'http', ':', '//t.co/OblRZXyfpx']
Lemmatization
['.', '@', 'User', '#', 'FailedAndIncompetentPOTUSCOVID19Leadership', '100,000', 'Blessed', 'Souls', 'Lost', '(', 'DELAYED', 'RESPONSE', 'Resulted', 'in', 'Many', 'Thousands', 'of', 'Unnecessary', 'Deaths', ')', ',', 'REPEATEDLY', 'Called', 'Coronavirus', 'a', '#', 'HOAX', ',', 'fired', 'doctor', ',', 'and', 'told', 'Americans', 'to', 'inject', 'themselves', 'with', 'bleach', '.', 'http', ':', '//t.co/2r40eUVcce', 'http', ':', '//t.co/VnkdotFfPX']
Lemmatization
['But', 'is', 'it', 'better', 'than', 'bleach', '?', '#', 'covid19', '#', 'vaccine', 'http', ':', '//t.co/pGphhhENTQ']
Lemmatization
['A', 'Leader', '

['I', 'think', 'big', 'pharma', 'is', 'behind', 'these', 'attempt', 'to', 'discredit', 'hydroxychloroquine', '.', 'Otherwise', 'who', 'will', 'buy', 'their', 'drug', 'being', 'developed', 'by', 'them', 'to', 'cure', 'Chinese', 'virus', '.', 'http', ':', '//t.co/Nd1xdv5fYX']
Lemmatization
['May', '26', ',', '2020', 'We', 'have', 'to', 'learn', 'a', 'we', 'go', 'from', 'ea', 'country', "'s", 'approach', 'to', 'this', 'Covid19', 'virus', ',', '&', 'share', 'w', 'ea', 'other', 'country', ',', 'what', 'is', 'working', '&', 'what', 'doe', 'not', ',', 'a', 'scientist', 'evaluate', 'our', 'best', 'approach', '.', 'Not', 'listen', 'to', 'a', 'reality', 'TV', 'star', 'that', 'suggests', 'drinking', 'bleach', ',', 'and', 'all', 'the', 'bizarre', 'claim', 'http', ':', '//t.co/Fr0GhPC1Pb']
Lemmatization
['Earlier', 'this', 'week', ',', 'India', 'had', 'sent', 'to', 'Israel', 'a', 'five-tonne', 'cargo', 'of', 'medicine', ',', 'including', 'anti-malarial', 'drug', 'hydroxychloroquine', 'seen', 'a', '

Lemmatization
['Of', 'course', '...', 'we', 'can', '’', 't', 'have', 'reassurance', 'of', 'antibody', 'even', 'the', 'fear', 'MUST', 'continue', 'to', 'until', 'the', 'election', '🤔', 'nor', 'the', 'use', 'of', 'the', 'known', 'cure', 'hydroxychloroquine', '...', 'go', 'back', 'to', 'your', 'home', 'with', 'your', 'mask', 'and', 'give', 'up', 'your', 'right', '.', 'The', 'NWO', 'will', 'rescue', 'u', '🙄']
Lemmatization
['Anthony', 'Fauci', 'today', 'became', 'the', 'first', 'Trump', 'administration', 'official', 'to', 'say', 'definitively', 'that', '#', 'hydroxychloroquine', 'is', 'not', 'an', 'effective', 'treatment', 'for', 'the', 'coronavirus', ',', 'based', 'on', 'the', 'available', 'data', 'http', ':', '//t.co/9a1Hz4Unf9']
Lemmatization
["''38", '%', 'of', 'Americans', 'wouldn', '’', 't', 'buy', 'Corona', 'beer', "'cause", 'of', 'the', 'coronavirus', '.', "''Check", 'the', 'analysis', 'on', 'how', 'a', 'company', 'name', 'can', 'have', 'a', 'significant', 'impact', 'on', 'a', 'com

Lemmatization
['Congratulations', 'to', '@', 'User', '@', 'User', 'for', 'no', 'longer', 'tolerating', '@', 'User', 'putting', 'him', 'in', 'his', 'place', '.', 'Oh', 'but', 'it', "'s", 'cool', 'for', 'him', 'to', 'playfully', 'insult', 'the', 'integrity', 'of', '@', 'User', 'by', 'kiddingly', 'telling', 'The', 'World', ',', 'on', 'NewsTV', ',', 'to', 'perhaps', 'drink', 'Bleach', 'a', 'Covid19', 'Treatment', '!']
Lemmatization
['28', '%', 'of', 'U.S.', 'adult', 'say', 'they', 'know', 'someone', 'diagnosed', 'with', 'COVID-19', 'Pew', 'Research', 'Center', '⚠️👀🤔', '...', '👿', 'So', 'if', 'the', 'C-virus', '(', 'Covid19', ')', 'is', 'indeed', 'a', 'bioweapon', 'like', 'many', 'suspected', '...', 'how', 'doe', 'one', 'read', 'and', 'understand', 'these', 'stats', '?', '#', 'coronavirus', '#', 'JacobsTrouble', 'http', ':', '//t.co/wEHxJXGczk']
Lemmatization
['Let', '’', 's', 'not', 'forget', 'about', 'Fauci', '’', 's', 'approval', 'og', 'Hydroxychloroquine', 'to', 'treat', 'Coronavirus', 

Lemmatization
['It', '’', 's', 'super', 'concerning', 'knowing', 'some', 'medical', 'board', 'gave', 'my', 'bos', 'credential', 'at', 'some', 'point', 'when', 'she', '’', 's', 'belief', 'that', 'chlorine', 'and', 'salt', 'water', 'cure', '#', 'coronavirus']
Lemmatization
['👀', '.', '@', 'User', 'No', 'wonder', 'they', 'didn', '’', 't', 'want', 'you', 'talking', 'about', 'hydroxychloroquine', '...', 'Could', 'it', 'be', 'a', '#', 'CancerCure', '?', 'Wouldn', '’', 't', 'want', 'that', 'info', 'to', 'get', 'out', 'to', 'the', 'slave', '...', '😎', 'http', ':', '//t.co/7ziPBvnoz2']
Lemmatization
['Why', 'do', 'some', 'people', 'believe', 'hydroxychloroquine', 'is', 'a', 'potential', 'cure', 'or', 'treatment', 'for', 'the', 'virus', '?', '#', 'RSBQandA', 'http', ':', '//t.co/X1fHjAya7I']
Lemmatization
['Can', '’', 't', 'implement', 'global', 'vaccination', '💉', 'if', 'a', 'cheap', 'cure', 'is', 'readily', 'available', 'Bill', 'Gates', 'make', 'the', 'call', 'to', 'the', 'WHO', 'to', 'shut', 

Lemmatization
['Covid-19', '’', 's', 'devastating', 'toll', 'on', 'black', 'and', 'Latino', 'Americans', ',', 'in', 'one', 'chart', 'http', ':', '//t.co/OocPgc1gx1', '#', 'seeAlso', 'Black', 'and', 'Asian', 'people', 'in', 'U.K.', 'at', 'higher', 'risk', 'of', 'death', 'from', 'COVID-19', ':', 'study', 'http', ':', '//t.co/4KxQBYl2ab', '#', 'race', '#', 'bioweapon', '?']
Lemmatization
['Colombia', 'account', 'for', '70', '%', 'of', 'global', 'cocaine', 'production', '.', 'But', 'with', 'strict', 'lockdown', 'in', 'place', ',', 'business', 'is', 'dependent', 'on', 'cartel', '’', 'ability', 'to', 'adapt', '.', 'Some', 'have', 'proved', 'more', 'resilient', 'than', 'others', '.', 'http', ':', '//t.co/C4Sfxa6YqB']
Lemmatization
['Spraying', 'and', 'introducing', 'bleach', 'or', 'another', 'disinfectant', 'into', 'your', 'body', 'will', 'not', 'protect', 'you', 'against', 'COVID-19', 'and', 'can', 'be', 'dangerous', '.', 'Bleach', 'and', 'disinfectant', 'should', 'be', 'used', 'carefully', 

Lemmatization
['Hello', '@', 'User', '#', 'lie', '5G', 'mobile', 'network', 'DO', 'NOT', 'spread', 'COVID-19', '.', '#', 'coronavirus']
Lemmatization
['“', 'There', '’', 's', 'such', 'a', 'disconnect', '.', 'For', 'some', 'reason', ',', 'people', 'think', 'coronavirus', 'is', 'gone', 'in', 'San', 'Francisco', '.', 'It', 'is', 'not', 'gone.', '”', 'http', ':', '//t.co/PI2ajubDzx']
Lemmatization
['Good', 'God', '@', 'User', '#', 'fact', 'We', 'shouldn', '’', 't', 'be', 'surprised', 'that', 'conspiracy', 'theorist', 'are', 'using', '5G', 'to', 'explain', 'the', 'new', 'coronavirus', '.', '#', 'coronavirus']
Lemmatization
['Contract', 'tracing', 'contract', '6mths', 'BEFORE', '#', 'COVID19', '#', 'coronavirus', '@', 'User', '#', 'GatesOfHell', '#', 'Agenda21', 'http', ':', '//t.co/JioKBKdc4F']
Lemmatization
['Hey', '@', 'User', '#', 'fact', 'There', "'s", 'currently', 'no', 'evidence', 'that', 'any', 'research', 'institute', 'in', 'Wuhan', 'wa', 'the', 'source', '.', '#', 'coronavirus', '#

Lemmatization
['My', 'dad', 'rang', 'me', 'to', 'tell', 'me', 'he', "'s", 'sure', 'that', 'my', 'blood', 'type', 'is', 'immune', 'to', 'covid', '.', 'Then', 'he', 'asked', 'what', 'my', 'blood', 'type', 'is', '.', 'Dad', ':', 'yes', 'that', "'s", 'the', 'one', 'that', "'s", 'immune', 'so', 'you', "'re", 'safe', 'unlike', 'me', 'who', "'s", 'still', 'indoors', '.', 'Me', ':', 'we', "'ve", 'got', 'the', 'same', 'blood', 'type', 'dad', '.', 'Dad', ':', 'yes', 'but', 'mine', 'is', 'old', '.', '#', 'DadsArePriceless', 'http', ':', '//t.co/NiosmcJybn']
Lemmatization
['Bleach', 'cure', 'COVID-19', 'though', '.', 'They', 'were', 'doing', 'you', 'a', 'favor', '.', 'http', ':', '//t.co/1xc45A3Rx2']
Lemmatization
['🚨URGENT', 'SAFETY', 'MESSAGE🚨', 'Now', 'introducing', '!', 'The', 'Bleach', 'Shake', '!', 'Keep', 'COVID', 'free', 'in', 'style', 'with', 'Clorox', 'and', 'Lysol', 'flavor', '!', 'Endorsed', 'by', 'the', 'president', '!', 'Drank', 'by', 'moron', 'everywhere', '!', 'Please', 'drink', 'r

Lemmatization
['But', 'doe', 'it', 'work', 'a', 'well', 'a', 'injecting', 'bleach', '?', 'http', ':', '//t.co/P0aJU0wET2']
Lemmatization
['With', 'our', 'country', 'dealing', 'with', 'the', 'COVID-19', 'pandemic', 'sadly', 'there', 'are', 'scammer', 'trying', 'to', 'make', 'money', 'by', 'offering', 'to', 'sell', 'fraudulent', 'product', 'which', 'they', 'claim', 'to', 'prevent', ',', 'treat', 'or', 'cure', 'COVID-19', '.', 'In', 'our', 'year', 'of', 'representing', 'people', 'all', 'over', 'our', 'country', 'who', 'hav…https', ':', '//t.co/CGpk84lqvm']
Lemmatization
['The', '#', 'US', 'Food', 'and', 'Drug', 'Administration', 'come', 'under', 'fire', 'from', '@', 'User', '@', 'User', 'after', 'it', 'revoked', 'it', 'emergency', 'use', 'authorization', 'for', '#', 'Hydroxychloroquine', 'to', 'treat', '#', 'COVID19', 'http', ':', '//t.co/z5wjj4X5CZ']
Lemmatization
['COVID-19', '...', 'You', 'Need', 'A', 'MASK', 'and', 'MORE', '.', 'Take', 'care', 'of', 'YOU', '.', 'Register', 'for', 'my'

Lemmatization
['Jim', 'Bakker', 'sued', 'by', 'second', 'state', 'for', 'selling', 'fake', 'coronavirus', 'cure', 'http', ':', '//t.co/NFu4aTQF5c']
Lemmatization
['If', 'eating', 'food', 'protects', 'you', 'from', 'getting', 'covid', '19', 'I', "'d", 'say', 'I', "'m", 'immune', 'http', ':', '//t.co/Ols6XPiFXu']
Lemmatization
['There', 'might', 'be', 'a', '``', 'second', 'wave', "''", 'of', '``', 'COVID', "''", ',', 'the', 'propaganda', 'born', 'mind-virus', '!', '!', 'Quick', ',', 'if', 'you', "'re", 'really', 'not', 'bright', ',', 'go', 'compromise', 'your', 'immune', 'system', 'with', 'a', 'flu', 'shot', '...', 'and', 'MANY', 'scared', 'propaganda', 'victim', 'will', '.', 'It', "'s", 'entertainingly', 'depressing', 'people', 'this', 'thick', 'breed', 'let', 'alone', 'breath☺️']
Lemmatization
['A', 'healthy', 'immune', 'system', 'is', 'the', 'answer', '!', '#', 'covid19', '#', 'coronavirus', '#', 'MasksScam', '#', 'NoMasks', '#', 'masksdontwork', '#', 'immunesystem', '#', 'VitaminD', 

Lemmatization
['But', 'the', 'virus', 'is', 'a', 'hoax', '?', 'Isn', '’', 't', 'it', 'in', 'Alabama', '?', 'The', 'heart', 'of', 'MAGA', 'country', '.', 'Y', '’', 'all', 'can', 'take', 'HCQ', 'like', 'candy', 'and', 'drink', 'bleach', ',', 'right', '?', 'Oh..', 'maybe', 'not', ',', 'because', '\u2066', '@', 'User\u2069', 'lied', 'to', 'you', '!', '😡', '“', '‘', 'ICUs', 'are', 'full', ',', 'patient', 'are', "dying'.", '”', '#', 'coronavirus', '#', 'COVID19', 'http', ':', '//t.co/NvajsIrCFV']
Lemmatization
['Racism', 'is', 'a', 'fake', 'a', 'crisis', 'in', 'the', '2020', 'U.S.', 'a', 'coronavirus', 'wa', '.']
Lemmatization
['I', 'mean', ',', 'I', "'m", 'not', 'in', 'medicine', 'but', 'I', 'know', 'there', "'s", 'no', 'AIDS', 'vaccine', '.', 'Mr', '.', 'Fake', 'News', 'is', 'looking', 'at', 'himself', 'in', 'the', 'mirror', '.', 'http', ':', '//t.co/41kyfM6qyA']
Lemmatization
['Science', 'is', 'being', 'massively', 'manipulated', 'by', 'lobby', ',', 'after', 'Hydroxychloroquine', 'in', 't

Lemmatization
['just', 'because', 'someone', 'work', 'in', 'healthcare', ',', 'doe', 'NOT', 'mean', 'they', 'are', 'immune', 'to', 'COVID-19', '.', 'If', 'anything', 'they', 'are', 'the', 'most', 'at', 'risk', '.']
Lemmatization
['.', '@', 'User', 'ha', 'released', 'a', 'new', 'toolkit', 'regarding', 'disinformation', 'and', '#', 'COVID19', '.', 'The', 'toolkit', 'is', 'to', 'help', 'state', ',', 'local', ',', 'tribal', 'and', 'territorial', 'official', 'bring', 'awareness', 'to', 'misinformation', ',', 'disinformation', ',', 'and', 'conspiracy', 'theory', 'related', 'to', '#', 'coronavirus', '.', 'http', ':', '//t.co/zV238hNsYQ']
Lemmatization
['Smoking', 'and', 'exposure', 'to', 'secondhand', 'smoke', 'are', 'risk', 'factor', 'for', 'susceptibility', 'to', 'more', 'severe', 'COVID-19', 'symptom', 'because', 'of', 'their', 'impact', 'on', 'underlying', 'health', 'condition', 'like', 'diminished', 'lung', 'function', ',', 'weakened', 'immune', 'system', ',', 'and', 'associated', 'risk'

Lemmatization
['😍❤️', 'Covid', 'is', 'coming', 'back', 'around', 'supposedly', '.', 'Keep', 'your', 'immune', 'system', 'boosted', '.', 'Buy', '2', 'get', '1', 'FREE', 'You', 'can', 'never', 'go', 'wrong', 'with', 'my', 'SeaMoss', 'Gel', '.', 'It', '’', 'll', 'do', 'the', 'body', 'gooooood', '.', 'Check', 'my', 'website', 'for', 'detail', '.', 'Facts', '.', 'SHIPPING', 'IS…', 'http', ':', '//t.co/o4UbiYsyED']
Lemmatization
['If', 'you', 'think', 'COVID', 'is', 'fake', 'you', 'are', 'really', 'stupid', '.', 'If', 'you', 'think', 'systemic', 'racism', 'doesnt', 'exist', 'in', 'America', 'you', 'are', 'really', 'stupid', '.']
Lemmatization
['Kenya', 'will', 'start', 'it', 'own', 'clinical', 'trial', 'of', 'dexamethasone', ',', 'a', 'steroid', 'drug', 'used', 'to', 'treat', 'severely', 'ill', 'Covid-19', 'patient', '.', 'http', ':', '//t.co/dPYEhDi8md']
Lemmatization
['Where', 'is', 'the', 'idiot', 'imam', 'with', 'his', 'fake', 'dream', 'that', 'said', 'Bangladesh', 'won', '’', 't', 'get'

Lemmatization
['Microsoft', 'ha', 'just', 'announced', 'a', 'partnership', 'with', 'Adaptive', 'Biotechnologies', 'to', "'share", 'population-wide', 'immune', 'response', 'to', 'the', 'COVID-19', 'virus', '.', "'", 'The', 'revolutionary', 'database', ',', 'called', 'ImmuneCODE', ',', 'is', 'free', 'to', 'access', 'for', 'researcher', 'and', 'public', 'health', 'official', '.', 'http', ':', '//t.co/g1ipA1BdG8']
Lemmatization
['People', '...', 'you', 'tested', 'negitive', ',', 'good', 'for', 'you', '!', 'It', 'mean', 'you', 'do', "n't", 'have', 'COVID', '19', '.', 'Does', 'not', 'mean', 'your', 'immune', ',', 'only', 'that', 'you', 'have', 'not', 'caught', 'it', 'yet', '!', 'Wear', 'your', 'mask', 'take', 'precaution', ',', 'stay', 'safe', '.', 'Please', 'save', 'a', 'life', ',', 'yours', 'or', 'others', '.']
Lemmatization
['I', 'knew', 'it', 'would', 'happen', ',', 'but', 'somebody', 'is', 'defending', 'using', 'bleach', 'on', 'food', 'to', 'fight', 'coronavirus', 'in', 'my', 'comment',

['for', 'people', 'who', 'are', 'saying', 'COVID', 'is', 'fake..', 'also', 'sorry', 'for', 'your', 'loss', '😔', 'http', ':', '//t.co/RkomfAFm4J']
Lemmatization
['So', 'there', 'is', 'currently', 'no', 'evidence', 'that', 'people', 'who', 'have', 'recovered', 'from', 'COVID-19', 'and', 'have', 'antibody', 'are', 'protected', 'from', 'a', 'second', 'infection', '.', 'Better', 'keep', 'those', 'mask', 'on', 'everybody', '!', 'Nobody', 'is', 'immune', 'to', 'this', '.', 'http', ':', '//t.co/uqXRqOId50']
Lemmatization
['Naturamore', 'Immune', 'Plus', 'for', 'optimum', 'immune', 'response', 'amidst', 'Covid-19', 'pandemic', '#', 'naturamore', '#', 'ImmuneResponse', '#', 'Covid19', 'http', ':', '//t.co/ndZJOamBqs']
Lemmatization
['So', 'Kayleigh', 'McEnany', 'wa', 'asked', 'if', 'she', 'would', 'be', 'wearing', 'a', 'mask', 'at', 'tRumps', 'ego', 'rally', 'she', 'said', 'no', 'because', 'she', 'is', 'tested', 'on', 'a', 'regular', 'basis', ',', 'So', 'doe', 'she', 'believe', 'being', 'tested'

Lemmatization
['But', 'protester', 'and', 'rioter', 'are', 'immune', 'apparently', '.', 'Weird', 'how', 'NO', 'ONE', 'talked', 'about', 'Covid', 'then', '.', 'Personally', 'ANY', 'large', 'gathering', 'if', 'fucking', 'threatening', '.', 'http', ':', '//t.co/nSd9ZTsNq2']
Lemmatization
['“', 'Sure', ',', 'my', 'friend', '’', 's', 'son', 'almost', 'died', 'of', 'Covid', ',', 'but', 'someone', 'on', 'Facebook', 'posted', 'a', 'video', 'saying', 'it', '’', 's', 'a', 'conspiracy', ',', 'so', 'I', '’', 'm', 'not', 'sure', '!', '”', 'http', ':', '//t.co/S5AXT3xq96']
Lemmatization
['IMHO', ',', 'you', 'have', 'to', 'be', 'NUTS', 'to', 'not', 'want', 'to', 'catch', 'the', 'Corona', 'Virus', '!', 'It', 'will', 'build', 'up', 'your', 'immune', 'system', 'for', 'a', 'future', 'beak', 'out', '.', '#', 'Covid_19', '@', 'User']
Lemmatization
['Now', 'they', 'can', 'focus', 'on', 'the', 'real', 'cure', ',', 'how', 'to', 'safely', 'inject', 'bleach', '#', 'COVIDIOTS', '#', 'TulsaTrumpRally', '#', 'Deat

Lemmatization
['Covid', 'a', 'conspiracy']
Lemmatization
['I', 'actually', 'tried', 'this', 'but', 'added', 'a', 'piece', 'of', 'garlic', 'and', 'took', 'it', 'on', 'an', 'empty', 'stomach', '.', 'Felt', 'way', 'better', 'and', 'now', 'Im', 'covid', 'free', 'http', ':', '//t.co/suQ0dh6FTT']
Lemmatization
['My', 'mother', '’', 's', 'coronavirus', 'conspiracy', 'theory', 'are', 'on', '💯', '.', 'First', 'thing', 'I', 'heard', 'this', 'morning', 'wa', 'her', 'otp', 'with', 'my', 'nana', 'talking', 'about', ',', '“', 'if', 'the', 'virus', 'can', 'be', 'spread', 'through', '💩', 'and', 'dog', 'can', 'get', 'it', ',', 'doe', 'the', 'wind', 'pick', 'it', 'up', 'and', 'spread', 'it', '?', '”']
Lemmatization
['5G', 'doe', 'not', 'cause', 'the', 'spread', 'of', 'COVID-19', 'http', ':', '//t.co/feR4ZQQLQw']
Lemmatization
['If', 'you', 'ever', 'partied', 'in', 'Bens', 'basement', 'in', '#', 'Morgantown', 'you', 'are', 'immune', 'to', '#', 'Coronavirus']
Lemmatization
['Fake', ',', 'fake', ',', 'fake

Lemmatization
['Immune', 'your', 'immunity', 'against', 'COVID', 'ITS', 'EASIER', 'TO', 'STAY', 'WELL', 'THAN', 'TO', 'GET', 'WELL', 'TRY', 'TODAY', 'THE', 'BEST', 'IN', 'CLASS', 'IMMUNOBOOSTER', 'FLAXEVIT', '-4G', '#', 'BoostImmunity', '#', 'FightAgainstCorona', '#', 'FightAgaintCovid', '#', 'Immune', '#', 'StayHealthy', '#', 'HealthyTips', '#', 'StayFit', '#', 'FitIndia', 'http', ':', '//t.co/TcjLNaSacb']
Lemmatization
['I', 'went', 'to', 'school', 'with', 'a', 'lad', 'who', 'is', 'now', 'a', 'hardcore', 'peddler', 'of', 'woo', 'and', 'conspiracy', 'theory', '.', 'Today', 'he', 'ha', 'decided', 'that', 'the', 'fact', 'that', 'if', 'you', 'google', '‘', 'x', 'new', 'case', '’', 'it', 'brings', 'up', 'article', 'about', 'coronavirus', 'case', ',', 'is', 'a', 'government', 'conspiracy', '🧐']
Lemmatization
['Supplements', 'Won', '’', 't', 'Prevent', 'or', 'Treat', 'COVID-19', 'http', ':', '//t.co/4p942n5bN8', 'via', '@', 'User']
Lemmatization
['There', '’', 's', 'a', 'conspiracy', 'theor

Lemmatization
['Garlic', 'doesnt', 'protect', 'you', 'all', 'that', 'much', 'from', 'the', 'flu', ',', 'and', 'I', "'m", 'totally', 'surprised', 'that', 'Trump', 'hasnt', 'recommended', 'it', 'to', 'fend', 'off', 'COVID', '?', '!']
Lemmatization
['The', 'COVID', 'unit', 'at', 'my', 'job', 'is', 'almost', 'at', 'max', 'capacity', '...', '.', 'guy', ',', 'be', 'safe', '.', 'Just', 'because', 'thing', 'are', 'opening', 'up', 'doesn', '’', 't', 'mean', 'you', 'should', 'go', '.', 'Be', 'mindful', 'of', 'what', 'you', 'touch', '.', 'Wash', 'your', 'hand', '...', 'thoroughly', '.', 'Boost', 'your', 'immune', 'sys', '.', 'Stay', 'safe', '💛']
Lemmatization
['“', 'Rural', 'area', 'seemed', 'immune', 'a', 'the', 'coronavirus', 'spread', 'through', 'city', 'earlier', 'this', 'year', '.', '.', '.', '.', 'But', 'that', 'false', 'sense', 'of', 'safety', 'is', 'now', 'falling', 'apart', 'a', 'infection', 'rate', 'explode', 'in', 'rural', 'area', 'across', 'the', 'country.', '”', 'http', ':', '//t.co/

Lemmatization
['#', 'CORONIL', '#', 'Ayurvedic', 'medicine', 'is', 'said', 'to', 'be', 'world', "'s", 'oldest', 'holistic', 'healing', 'system', 'are', 'used', 'in', 'treatment', 'prevention', 'to', 'enhance', 'human', 'immune', 'system', '.', 'Swami', 'Ramdev', "'s", '#', 'Patanjali', 'ha', 'claimed', 'to', 'develop', 'a', 'cure', 'for', 'the', 'treatment', 'of', '#', 'coronavirus', 'http', ':', '//t.co/IPpoTWFd6a']
Lemmatization
['TB', 'patient', 'or', 'those', 'with', 'weak', 'immune', 'system', 'may', 'suffer', 'from', 'more', 'severe', 'form', 'of', 'COVID-19', 'if', 'infected', '.', 'If', 'you', 'have', 'any', 'symptom', 'like', 'fever', ',', 'night', 'sweat', 'or', 'loss', 'of', 'appetite', ',', 'contact', 'MALC', '.', 'Donate', 'online', 'for', 'TB', 'management', 'service', 'http', ':', '//t.co/XI7shZ2IbU', '#', 'EndTB', '#', 'StopTB', 'http', ':', '//t.co/HK5hIsmftL']
Lemmatization
['Jay', 'Hind🇮🇳', 'Baba', 'Ramdev', 'headed', 'Patanjali', 'group', 'launched', 'the', 'first',

Lemmatization
['Risks', 'of', 'health', 'product', 'that', 'make', 'false', 'claim', 'about', 'COVID-19', 'While', 'research', 'into', 'the', 'treatment', 'and', 'prevention', 'of', 'COVID-19', 'is', 'being', 'conducted', ',', 'Health', 'Canada', 'ha', 'not', 'approved', 'any', 'product', 'to', 'prevent', ',', 'treat', 'or', 'cure', 'COVID-19', '.', 'If', 'you…https', ':', '//t.co/dh2P1g5YJk', 'http', ':', '//t.co/eMrRCgnyde']
Lemmatization
['COVID', 'is', 'a', 'conspiracy', 'created', 'by', 'Moslims', 'to', 'get', 'you', 'to', 'cover', 'your', 'face', 'and', 'wash', 'your', 'hand', '.', '#', 'creepingsharia']
Lemmatization
['If', 'this', 'is', 'true', 'then', 'hat', 'off', 'to', 'Baba', '.', 'I', 'hope', 'this', 'is', 'true', 'coz', 'world', 'need', 'it', 'badly', 'now', '.', 'I', 'am', 'just', 'scared', 'coz', 'of', 'his', 'history', '.', "'100", '%', 'recovery', 'in', '7', 'day', "'", ':', 'Ramdev', 'unveils', 'Patanjali', "'s", 'Ayurvedic', 'medicine', "'Coronil", 'and', 'Swasari',

Lemmatization
['Condoms', 'have', '98', '%', 'effectiveness', 'Contraceptive', 'pill', 'ha', '99', '%', 'effectiveness', 'Polio', 'Vaccine', 'ha', '99', '%', 'effectiveness', 'But', 'Baba', 'Ramdev', 'say', 'his', 'medicine', '#', 'CORONIL', 'ha', '100', '%', 'recovery', 'rate', '.', 'Coronavirus', 'Ayush', 'Ministry', 'lens', 'on', 'Baba', 'Ramdev', '’', 's', 'COVID-19', 'cure', 'The', 'Hindu', 'http', ':', '//t.co/ISta2ym8GP']
Lemmatization
['everytime', 'i', 'get', 'sick', 'which', 'happens', 'about', '3-6', 'time', 'a', 'year', '(', 'IT', 'FUCKEN', 'HAPPENS', 'WE', 'ALL', 'GET', 'SICK', ')', 'he', 'always', 'blame', 'it', 'on', 'me', 'bc', 'it', 'my', 'fault', 'that', 'i', 'dont', '``', 'take', 'care', 'of', 'myself', "''", 'he', 'say', 'to', 'keep', 'from', 'getting', 'sick', 'all', 'you', 'need', 'is', 'to', 'eat', 'healthy', 'like', 'NO', 'you', 'also', 'need', 'to', 'protect', 'yourself']
Lemmatization
['#', 'covid', '#', 'india', 'and', '#', 'supplement', 'sale', 'by', '#', 'n

Lemmatization
['I', 'can', 'slap', 'corona', 'out', 'of', 'you', '.', 'Red', 'cheek', 'guaranteed', '#', 'CoronaCure', 'http', ':', '//t.co/dAXXlyJQ1y']
Lemmatization
['Wife', 'of', '#', 'NovakDjokovic', 'ha', 'been', 'saying', 'that', '#', '5g', 'is', 'partly', 'to', 'blame', 'for', 'Covid', '19', '!', 'I', 'now', 'why', 'Novak', 'did', "n't", 'bother', 'with', 'social', 'distancing', 'with', 'his', 'tennis', 'tournament', '.', 'I', 'wonder', 'how', 'many', 'people', 'he', 'is', 'now', 'responsible', 'for', 'infecting', 'with', '#', 'Coronavirus', '#', 'COVID19', 'http', ':', '//t.co/mZIB9rRbct']
Lemmatization
['Just', 'because', 'you', '’', 're', '24', 'doesn', '’', 't', 'make', 'you', '’', 're', 'immune', 'to', 'coronavirus', '.', 'It', 'just', 'make', 'you', 'stupid', '.']
Lemmatization
['Christian', 'TV', 'channel', 'escape', 'further', 'sanction', 'by', 'Ofcom', 'after', 'falsely', 'linking', '5G', 'to', 'coronavirus', 'http', ':', '//t.co/BfVkug1yaM']
Lemmatization
['The', 'gut'

Lemmatization
['#', 'COVID19', 'conspiracy', 'theorist', 'need', 'to', 'realize', 'that', 'ordinance', 'requiring', 'social', 'distancing', 'and', 'the', 'wearing', 'of', 'mask', 'in', 'public', 'are', 'not', 'an', 'infringement', 'of', 'their', 'right', ',', 'but', 'a', 'mean', 'of', 'preserving', 'all', 'Americans', "'", 'right', 'to', 'life', '.', 'http', ':', '//t.co/EgJcyo20io']
Lemmatization
['Me', 'think', 'so', 'too', '!', 'The', 'gov', 'ca', "n't", 'say', 'they', 'have', 'been', 'poisoning', 'u', 'for', 'year', 'imagine', 'the', 'uproar', 'it', 'affect', 'u', 'all', 'eating', 'synthetic', 'food', 'for', 'decade', '.', 'Why', 'doe', 'supermarket', 'meat', 'taste', 'like', 'Rubber', '?', 'coronvirus', 'smokescreen..', '..but', 'then', 'there', 'is', 'COVID-19', 'is', 'that', 'the', 'bioweapon', '?', 'http', ':', '//t.co/ePTEayhRJP']
Lemmatization
['I', 'like', 'Canada', '.', 'Where', 'fewer', 'people', 'are', 'dying', 'due', 'to', 'COVID-19', '.', 'Because', 'a', 'woman', '(', '

Lemmatization
['A', 'COVID-19', 'Survivor', 'Reveals', 'How', 'Steroids', 'Saved', 'His', 'Life', 'http', ':', '//t.co/kRSei6Ioyg', '#', 'Newsmax', 'via', '@', 'User']
Lemmatization
['Feels', 'fucking', 'weird', 'to', 'be', 'the', 'only', 'one', 'wearing', 'a', 'mask', 'when', 'I', 'go', 'out', '.', 'Did', 'Covid', 'pack', 'and', 'go', 'home', '?', 'Or', 'did', 'everyone', 'agree', 'it', "'s", 'a', 'conspiracy', '?']
Lemmatization
['I', 'miss', 'drinkin', 'bleach', 'with', 'my', 'homies', '.', 'It', 'wa', 'truly', 'the', 'best', 'way', 'to', 'fight', 'COVID', '.', 'http', ':', '//t.co/LMaA56MAaK']
Lemmatization
['It', '’', 's', 'not', 'Covid', 'or', 'any', 'other', 'bioweapon', 'in', 'the', 'pipeline', '..', 'The', 'main', 'enemy', 'is', '5G', 'and', 'IoT', 'conspiracy', '.']
Lemmatization
['I', 'ca', "n't", 'believe', 'the', 'people', 'on', 'this', 'programme', 'are', 'real', '...', 'BBC', 'News', 'The', 'people', 'who', 'think', 'coronavirus', 'is', 'caused', 'by', '5G', 'http', ':',

Lemmatization
['“', 'the', '5g', 'tower', 'are', 'causing', 'coronavirus', '”', 'is', 'exactly', 'what', 'the', '6G', 'tower', 'want', 'you', 'to', 'think']
Lemmatization
['#', 'COVIDIDIOTS', '#', 'UniteBlue', '#', 'Resistance', 'Another', 'death', 'caused', 'by', '@', 'User', 'http', ':', '//t.co/efEhOCqCTC']
Lemmatization
['We', "'ve", 'been', 'saying', 'since', 'day', 'one', 'that', 'this', 'is', 'a', '#', 'Chinese', 'nanotech', '#', 'bioweapon', '.', 'It', "'s", 'more', 'obvious', 'all', 'the', 'time', '.', '#', 'StopChinaNow', '!', 'Expert', 'claim', 'Americans', 'will', 'wear', 'mask', 'for', "'several", 'year', "'", 'due', 'to', '#', 'coronavirus', 'from', '#', 'China', 'http', ':', '//t.co/ReauzyS9Nw']
Lemmatization
['Don', '’', 't', 'ever', 'expect', 'a', 'cure', 'for', 'COVID', '.', 'God', 'know', 'that', 'when', 'we', 'come', 'back', 'out', 'of', 'lockdown', ',', 'it', 'will', 'be', 'weaponised', 'against', 'our', 'most', 'vulnerable', 'citizen', '.', 'Sure', 'I', 'might', '

In [29]:
df['clean_text']

0       PAGS PARODY! Plus, Joe talks coronavirus and h...
1       #coronavirus shows that racists have absolutel...
2       @User @User I would expect front line staff to...
3       The coronavirus is not only affecting the way ...
4       I hope your father recovers from the virus as ...
                              ...                        
3637    Da fark is wrong with these people?! https://t...
3638    Doctor Finds Cure to COVID-19 (without Vaccine...
3639    I can’t believe people legitimately think that...
3640    ...(wrong with Trump continues), -Will push pr...
3641    For anyone who still believes that COVID-19 is...
Name: clean_text, Length: 3642, dtype: object

In [42]:
df = df.applymap(lambda s:s.lower() if type(s) == str else s) #convert all to lowercase

In [54]:
#remove stated hashtags
remove_words = ['#nCoV20199',
'#CoronaOutbreak',
'#CoronaVirus',
'#CoronavirusCoverup',
'#CoronavirusOutbreak',
'#COVID19',
'#Coronavirus',
'#WuhanCoronavirus',
'#coronaviris',
'#Wuhan']

In [57]:
remove_words

['#nCoV20199',
 '#CoronaOutbreak',
 '#CoronaVirus',
 '#CoronavirusCoverup',
 '#CoronavirusOutbreak',
 '#COVID19',
 '#Coronavirus',
 '#WuhanCoronavirus',
 '#coronaviris',
 '#Wuhan']

In [61]:
pat = r'\b(?:{})\b'.format('|'.join(remove_words))

df['new'] = df['text'].str.replace(pat, '')

In [63]:
df['text'][352]

"hey @user if you are done with cocaine and gay orgies, take some similar diplomatic initiatives..\n\nstop being bajwa's bitch for once and show your balls, if you have any..\n\n#imrankhanpti #pakistan #coronaalert \n#coronaoutbreak #coronaviruspandemic #coronavirus #saarc https://t.co/55n0bn9zrv"

In [64]:
df['new'][352]

"hey @user if you are done with cocaine and gay orgies, take some similar diplomatic initiatives..\n\nstop being bajwa's bitch for once and show your balls, if you have any..\n\n#imrankhanpti #pakistan #coronaalert \n#coronaoutbreak #coronaviruspandemic #coronavirus #saarc https://t.co/55n0bn9zrv"

In [43]:
def form_sentence(tweet):
    tweet_blob = TextBlob(tweet)
    return ' '.join(tweet_blob.words)

print(form_sentence(df['clean_text'].iloc[1]))
print(df['clean_text'].iloc[1])


coronavirus shows that racists have absolutely 2 much time on their hands firstly those racists who hate poc r stuck in xenophobicworld are proud deplorables which means they did n't even beleev coronavirus existed until a day or 2 ago when trump did they 're quick studies https t.co/p22j3ti6kk
#coronavirus shows that racists have absolutely 2 much time on their hands firstly, those racists who hate poc r stuck in #xenophobicworld are proud #deplorables which means they didn't even beleev #coronavirus existed until a day or 2 ago when trump did they're quick studies https://t.co/p22j3ti6kk


In [48]:
df['text_np'] = df['clean_text'].map(form_sentence)

In [44]:
def no_user_alpha(tweet):
    tweet_list = [ele for ele in tweet.split() if ele != 'user']
    clean_tokens = [t for t in tweet_list if re.match(r'[^\W\d]*$', t)]
    clean_s = ' '.join(clean_tokens)
    clean_mess = [word for word in clean_s.split() if word.lower() not in stopwords.words('english')]
    return clean_mess
print(no_user_alpha(form_sentence((df['clean_text'].iloc[1]))))
print(df['clean_text'].iloc[1])

['coronavirus', 'shows', 'racists', 'absolutely', 'much', 'time', 'hands', 'firstly', 'racists', 'hate', 'poc', 'r', 'stuck', 'xenophobicworld', 'proud', 'deplorables', 'means', 'even', 'beleev', 'coronavirus', 'existed', 'day', 'ago', 'trump', 'quick', 'studies', 'https']
#coronavirus shows that racists have absolutely 2 much time on their hands firstly, those racists who hate poc r stuck in #xenophobicworld are proud #deplorables which means they didn't even beleev #coronavirus existed until a day or 2 ago when trump did they're quick studies https://t.co/p22j3ti6kk


In [49]:
df['text_sw'] = df['text_np'].map(no_user_alpha)

In [50]:
df['text_sw']

0       [pags, parody, plus, joe, talks, coronavirus, ...
1       [coronavirus, shows, racists, absolutely, much...
2       [would, expect, front, line, staff, risk, coro...
3       [coronavirus, affecting, way, live, also, dram...
4       [hope, father, recovers, virus, soonas, possib...
                              ...                        
3637                     [da, fark, wrong, people, https]
3638    [doctor, finds, cure, without, vaccine, https,...
3639    [believe, people, legitimately, think, covid, ...
3640    [wrong, trump, continues, push, pressure, gove...
3641    [anyone, still, believes, conspiracy, people, ...
Name: text_sw, Length: 3642, dtype: object

In [51]:
def normalisation(tweet_list):
        lem = WordNetLemmatizer()
        normalized_tweet = []
        for word in tweet_list:
            normalized_text = lem.lemmatize(word,'v')
            normalized_tweet.append(normalized_text)
        return normalized_tweet
    

print(normalization(no_user_alpha(form_sentence((df['clean_text'].iloc[1])))))

['coronavirus', 'show', 'racists', 'absolutely', 'much', 'time', 'hand', 'firstly', 'racists', 'hate', 'poc', 'r', 'stick', 'xenophobicworld', 'proud', 'deplorables', 'mean', 'even', 'beleev', 'coronavirus', 'exist', 'day', 'ago', 'trump', 'quick', 'study', 'https']


In [52]:
df['text_norm'] = df['text_sw'].map(normalisation)

In [53]:
df['text_norm']

0       [pags, parody, plus, joe, talk, coronavirus, s...
1       [coronavirus, show, racists, absolutely, much,...
2       [would, expect, front, line, staff, risk, coro...
3       [coronavirus, affect, way, live, also, dramati...
4       [hope, father, recover, virus, soonas, possibl...
                              ...                        
3637                     [da, fark, wrong, people, https]
3638    [doctor, find, cure, without, vaccine, https, ...
3639    [believe, people, legitimately, think, covid, ...
3640    [wrong, trump, continue, push, pressure, gover...
3641    [anyone, still, believe, conspiracy, people, d...
Name: text_norm, Length: 3642, dtype: object