In [1]:
tweet_events = []
filenames = ['./project-data/train.data.txt', './project-data/train.label.txt']

def gen_line(filename):
    with open(filename) as f:
        for line in f:
            yield line.strip()

gens = [gen_line(n) for n in filenames]
idx = 0
for data, label in zip(*gens):
    tweet_event = {}
    ids = data.split(',')
    idx += 1
    tweet_event["index"] = idx
    tweet_event["ids"] = ','.join(ids)
    tweet_event["is_rumor"] = label
    tweet_events.append(tweet_event)
    
tweet_events

[{'index': 1,
  'ids': '1250219300389974016,1250219116993974272,1250219437027766273,1250219620939657216,1250219777185873922,1250219894429208577,1250219998842216448,1250220115762667520,1250220272306638848,1250220389323526146,1250220527005753344,1250220791544705025,1250220987238383616,1250221140603047937,1250221275827470336,1250221402822545410',
  'is_rumor': 'nonrumour'},
 {'index': 2,
  'ids': '554886875303780352,554894001946759168,554959644125167617,554927149090897920,554892798869389312,554897957372567552,554903624808398850,554895301316591616,554902157942480896,554902638220042240,554915829171515392,554965654437384192,554898706680778753,554896019419574273,554893579957460992,554899738575642624,554895363165790209,554900417071833088,554907945377726464,554893028394278912,554956172529520640,554887349180833793,554890068603969536,554892765255847936,554928464676274177,554891144480718848,554961344676392960,554919484473880576,554895244412473346,554986669527818240,554919544310202368,5549086451634

In [15]:
import json
import nltk
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords

tt = TweetTokenizer()
stopwords = set(stopwords.words('english'))

invalid_events = 0
features = dict()
features['context_annotations'] = dict()
events_data = []

for event in tweet_events:
    ids = event['ids'].split(',')
    is_rumor = event['is_rumor']
    
    tweet_objects = []
    event_list = []
    
    for tweet_id in ids:
        try:
            with open('./train_dev_data/train/' + tweet_id + '.json') as tweet_str:
                for line in tweet_str:
                    tweet = json.loads(line)
                    tweet_objects.append(tweet)
        except:
            continue
            
    if (len(tweet_objects) == 0):
        invalid_events += 1
        
    else:
        for tweet in tweet_objects:
            data = dict()
            
            # finds all the possible features contained within a tweet object with their counts
            for feat in tweet:
                if feat == 'context_annotations':
                    for feat2 in tweet[feat][0]:
                        if type(tweet[feat][0][feat2]) == dict:
                            for feat3 in tweet[feat][0][feat2]:
                                if feat in features:
                                    if feat2 in features[feat]:
                                        if feat3 in features[feat][feat2]:
                                            features[feat][feat2][feat3] += 1
                                        else:
                                            features[feat][feat2][feat3] = 1
                                    else:
                                        features[feat][feat2] = dict()
                                        features[feat][feat2][feat3] = 1
                                else:
                                    features[feat] = dict()
                                    features[feat][feat2] = dict()
                                    features[feat][feat2][feat3] = 1
                        else:
                            if feat2 in features[feat]:
                                features[feat][feat2] += 1
                            else:
                                features[feat][feat2] = 1
                elif type(tweet[feat]) == dict:
                    for feat2 in tweet[feat]:
                        if feat in features:
                            if feat2 in features[feat]:
                                features[feat][feat2] += 1
                            else:
                                features[feat][feat2] = 1
                        else:
                            features[feat] = dict()
                            features[feat][feat2] = 1
                else:
                    if feat in features:
                        features[feat] += 1
                    else:
                        features[feat] = 1
            
            # compiling features of each tweet object
            text = tweet['text']
            text_removal_list = []
            text_tokens = set(tt.tokenize(text.lower()))

            for token in text_tokens:
                # if token is a link
                if (token.startswith('https') or token.startswith('http')):
                    text_removal_list.append(token)
                
                # if token doesn't contain any letters
                elif not any(char.isalpha() for char in token):
                    text_removal_list.append(token)

                # if token is found in stopwords
                elif (token in stopwords):
                    text_removal_list.append(token)

            # remove all compiled tokens in for loop above
            for token in text_removal_list:
                text_tokens.remove(token)
            
            lang = tweet['lang']
            author_id = tweet['author_id']
            
            if 'context_annotations' in tweet:
                entity = tweet['context_annotations'][0]['entity']
                entity_id = entity['id']
                entity_name = entity['name']
                if 'description' in tweet['context_annotations'][0]['domain']:
                    author_des = tweet['context_annotations'][0]['domain']['description']
                    author_removal_list = []
                    author_des_tokens = set(tt.tokenize(author_des.lower()))

                    for token in author_des_tokens:
                        # if token is a link
                        if (token.startswith('https') or token.startswith('http')):
                            author_removal_list.append(token)

                        # if token doesn't contain any letters
                        elif not any(char.isalpha() for char in token):
                            author_removal_list.append(token)

                        # if token is found in stopwords
                        elif (token in stopwords):
                            author_removal_list.append(token)

                    # remove all compiled tokens in for loop above
                    for token in author_removal_list:
                        author_des_tokens.remove(token)
                        
                else:
                    author_des_tokens = None
            else:
                entity_id = data['entity_name'] = data['author_des'] = None
        
            if 'entities' in tweet:
                hashtags = []
                mentions = []
                annotations = []
                if 'hashtags' in tweet['entities']:
                    for hashtag in tweet['entities']['hashtags']:
                        hashtags.append(hashtag['tag'])
                if 'mentions' in tweet['entities']:
                    for mention in tweet['entities']['mentions']:
                        mentions.append(mention['id'])
                if 'annotations' in tweet['entities']:
                    for annotation in tweet['entities']['annotations']:
                        annotations.append((annotation['normalized_text'], annotation['type'], annotation['probability']))
                        
            # compiling data
            data = [text_tokens, lang, author_id, entity_id, entity_name, author_des_tokens, hashtags, mentions, annotations]
            event_list.append(data)
            
    events_data.append((event_list, is_rumor))    

{'regularly', 'saline', 'help', 'coronavirus', 'new', 'infection', 'prevent', 'nose', 'rinsing'}
{'help', 'coronavirus', '#covid19malaysia', 'new', 'infection', 'garlic', 'eating', 'prevent'}
{'protect', 'coronavirus', 'new', 'vaccines', 'pneumonia'}
{'body', '#chamber', 'alcohol', 'coronavirus', 'kill', 'new', 'chlorine', 'spraying'}
{'detecting', 'effective', 'coronavirus', 'scanners', 'new', 'people', 'infected', 'thermal'}
{'coronavirus', 'disinfection', 'kill', 'new', 'lamp', 'ultraviolet'}
{'effective', 'coronavirus', 'killing', 'new', 'dryers', 'hand'}
{'coronavirus', 'cannot', 'new', 'transmitted', 'mosquito', 'bites'}
{'taking', 'coronavirus', 'disease', 'new', 'prevent', 'bath', 'hot'}
{'cannot', 'coronavirus', 'kill', 'snow', 'new', 'cold', 'weather'}
{'humid', 'climates', 'covid', 'virus', 'areas', 'transmitted', 'hot'}
{'protect', 'alcohol', 'covid', 'dangerous', 'drinking'}
{'covid', 'feeling', 'mean', 'lung', 'seconds', 'hold', 'without', 'breath', 'free', 'able', 'cough

{'detecting', 'effective', 'kill', 'infected', 'dryers', 'scanners', 'lamp', 'people', 'disinfection', 'ultraviolet', 'hand', 'coronavirus', 'killing', 'new', 'thermal'}
{'effective', 'prevent', 'younger', 'also', 'preventing', 'help', 'susceptible', 'affect', 'people', 'older', 'coronavirus', 'new', 'infection', 'treating', 'garlic', 'eating', 'antibiotics'}
{'mowing', 'yards', "he's", '@urbancraziness', 'hoptown', '@boren_madison'}
{'cant', 'believe', 'still', 'tupac', '@urbancraziness', 'alive'}
{'cant', 'believe', 'still', 'tupac', '@urbancraziness', '@dickwhiskeyy', 'alive'}
{'cant', 'believe', '@maui_wowwie', 'still', 'tupac', '@urbancraziness', 'alive'}
{'cant', 'believe', 'still', 'tupac', '@urbancraziness', 'alive'}
{'cant', 'believe', "y'all", 'still', 'tupac', '@urbancraziness', 'alive', 'ayyeee'}
{'cant', 'believe', 'still', 'tupac', '@urbancraziness', 'alive', '@kingdoig21'}
{'cant', '@thechomason', 'believe', 'still', 'tupac', '@urbancraziness', 'alive'}
{'cant', 'believe

{'nairobi', 'covid', 'sonko', 'warns', 'giving', 'packs', 'mike', 'note', 'people', 'small', 'governor', 'drinking', 'protect', 'alcohol', 'bottles', 'hennessy', 'food', 'dangerous'}
{'like', 'looks', 'band', 'top', 'blue', 'plastic', '@citizentvkenya', 'guy', 'bottle'}
{'effective', 'holy', 'taken', 'orally', 'sins', 'saints', "what's", 'wondering', 'sanitizer', 'even', 'microorganism', 'body', 'form', '@citizentvkenya', 'hennessy', 'cleanses', 'born'}
{'@jibrilkala', 'body', 'helping', 'alcohol', '@citizentvkenya', 'immunity', 'reduces'}
{'need', 'sonko', 'gov', 'mike', '@mikesonko', 'cheering', 'honest', 'thanks', '#henneybottlechallenge', '#hennessy', '@citizentvkenya', 'coronavirus', 'times', 'bit', 'stressful'}
{'jamaa', '@kelvin__mungai', 'kila', 'ako', 'ehhh', 'vituko', 'time', 'na', '@citizentvkenya'}
{'spirit', '@citizentvkenya', "that's"}
{'nice', '@_tweeted_', 'one', '@citizentvkenya'}
{'akili', 'mtu', 'sonko', 'think', 'yake', 'venye', 'alafu', 'ya', 'kuvote', 'ni', 'mara'

{'@drudge_report', 'know', 'talking', 'even', 'jendal', "i'm", "that's"}
{'@drudge_report', 'good', 'luck', 'getting'}
{'tell', 'louisiana', '@drudge_report', 'meet', 'stop', 'entering', 'jindal'}
{'agree', 'totally', 'wow', '@drudge_report', 'jindal'}
{'@drudge_report', '#returntosender', 'refugees'}
{'damn', 'jindal', 'pretty', 'problem', 'national', '@drudge_report', '#tcot', 'demands', 'call', 'information', 'stop', '@ellerich17', 'guard', 'late'}
{'said', 'jindal', '@drudge_report', 'weak', '@bobbyjindal', 'state', 'sender', 'return'}
{'ever', 'refugees', '@drudge_report', 'syrian', '@bobbyjindal', 'needs', 'immigration', 'anything', 'life', 'louisiana', 'stop'}
{'@drudge_report', 'stop', 'power'}
{'states', 'everyone', 'ther', 'benefit', '@drudge_report', 'citizens', '@traderstef', 'united'}
{'send', 'demand', 'homr', 'amendment', '@drudge_report', 'back', 'info', 'effing', 'round', 'sent', '10th'}
{'wow', '@drudge_report', '@love_chihuahua'}
{'memphis', '@drudge_report', 'come',

{'remember', 'step', 'correction', '@priyapyadav18', 'without', 'always', 'little', 'hm', 'dm', 'pm', 'fm', "can't", 'direction'}
{'@priyapyadav18', '@nijam35214500'}
{'@priyapyadav18', 'charity', 'aam', 'admi', 'first', '@bjp4india', 'empty', 'treasury', 'ask', 'begins', 'home'}
{'@priyapyadav18', 'morning', 'well', 'good', 'priya', 'beta'}
{'steal', '@priyapyadav18', 'ways', 'new', 'swindle', 'money', 'public'}
{'privately', '@priyapyadav18', 'citizens', 'private', 'funds', 'govt', 'money', 'use', 'get'}
{'go', 'shame', 'watching', 'pockets', 'karma', 'paisa', 'help', 'god', 'jayenge', '@priyapyadav18', 'sara', 'crores', 'hope', 'lords', 'kha', 'taking', 'pouring', 'action', 'many'}
{'sagging', 'image', '@priyapyadav18', 'waking', 'retrieve', 'end', 'public', 'beginning'}
{'sham', '@priyapyadav18', 'maharashtra', 'scam', 'government', 'collecting', 'complete', 'topple'}
{'good', '@priyapyadav18'}
{'@priyapyadav18', 'another', 'little', 'scam'}
{'away', 'gets', 'think', 'moron', '@pri

{'french', 'could', 'mouth', '#alldoneit', '@independent', 'dear', 'fries'}
{'spotted', 'deer', 'fanged', '#halloween', 'first', 'years', 'afghanistan', 'time', '@independent'}
{'spotted', 'whoa', 'deer', 'fanged', 'first', 'years', 'afghanistan', 'time', '@independent'}
{'spotted', 'deer', 'fanged', 'first', 'years', 'afghanistan', 'time', 'awesome', '@independent'}
{'@independent'}
{'#halloween', 'time', '@independent'}
{'vampire', 'spotted', 'kangaroo', 'deer', 'fanged', 'first', 'years', 'afghanistan', 'time', '@independent'}
{'vampire', 'deer', 'kill', 'omg', 'em', '@independent', 'evil'}
{'@zhidayatullah', '@independent'}
{'spotted', 'deer', 'fanged', 'first', 'years', 'afghanistan', 'time'}
{'wow', '@thewcs', '@sam10k', '@animal_watch', '@independent', '@chrisgpackham', 'adorable'}
{'@animal_watch', '@domdyer70', 'observed', 'presence', 'dodgy', 'survey', 'proved', 'term', 'scientifically', '@thewcs', 'data'}
{'mum', 'right', '@thewcs', '@irinagreenvoice', '@animal_watch', 'dad'

{'@redone68', 'red', '@coloradogun', 'better'}
{'@coloradogun', 'responsible', 'shootings', 'place', 'tried', 'gun', 'laws', 'put', 'obama', 'mass', 'times', 'multiple'}
{'nothing', 'eachother', '@coloradogun', 'lol', 'statistics', 'two'}
{'point', 'picture', 'besides', '@coloradogun', 'missed', 'happen', 'bigger'}
{'@coloradogun', 'bush', 'end', 'discussion'}
{'@coloradogun', 'figure', 'us', 'year', 'mass', 'shootings', 'bollocks'}
{'fault', 'deduction', 'columbo', 'genius', '@coloradogun', "obama's", 'must'}
{'stupidest', '@coloradogun', 'ever', 'far', 'oh', 'tweet'}
{'@coloradogun', 'usa'}
{'@coloradogun', 'way', 'factors', 'yeah', 'possibly', "that's", 'involved', 'president', 'sarcasm'}
{'worse', '@coloradogun', 'mass', 'shootings', 'actually', 'alone', "i'm", 'afraid'}
{'worse', 'twitter', '@coloradogun', 'statistic', 'irrelevant', 'today'}
{'fault', '@potus', '@coloradogun', "jr's", '#noleadership', 'inherited', 'may', '#wakeupamerica', 'say', 'bush', 'mess', 'true'}
{'@colorado

{'least', '#charliehebdo', 'paris', 'official', 'says', 'killed', 'magazine', 'attack', 'satirical', "today's"}
{'anyone', 'kill', 'bad', 'far', 'think', 'zionist', 'right', 'jews', 'bro', '@darjaved7', '@chusid2015', 'want', 'thats', 'hate', 'opposes', '@daliachai'}
{'western', 'go', 'various', 'inevitable', '@cnnbrk', 'springs', 'terrorists', 'arabic', 'created', 'leaders', 'west', 'jihadists.it', 'mercenaries'}
{'paris', '@cnnbrk', '@hassanbasmer', '@daliachai', 'pakistan', 'nigeria', 'persecuted', 'muslims', 'religion', '#charliehebdo'}
{'@zhuravkov67', 'insulting', 'hey', '@cnnbrk', 'go', 'faith', 'buddy', "i'm", 'fuck'}
{'@cnnbrk', 'animals', '@hassanbasmer', 'discusting', '@daliachai'}
{'magazine', 'killed', 'least', '@cnnbrk', 'paris', 'says', "today's", 'official', 'attack', 'satirical', '#charliehebdo'}
{'empowered', 'blame', 'islamic', 'cnn', 'makes', '@cnnbrk', 'terror', 'islam', 'jews', 'media', "can't", 'excuses'}
{'covid', 'pls', 'genuine', 'jokes', 'replies', 'airborne'

{'@husainhaqqani', 'sad', '@acted', '@scclemons'}
{'aid', 'innocent', '@acted', 'people', 'muslims', 'uk', '@scclemons', 'workers', 'unanimous', 'almost', 'journalists', 'killing', '@mehdirhasan', 'condemnation'}
{'@michaeltiyce', '@acted', 'sure', 'million', 'islamist', 'never', 'uk', 'feel', '@scclemons', 'leave', 'safe', 'make'}
{'commandments', '@acted', 'read', 'quran', '@scclemons', 'violent', 'one', '#quran', 'many'}
{'actions', 'haines', 'brarbaric', 'condemn', '@acted', 'thoughts', 'muslims', 'uk', '@scclemons', '#isis', 'firmly', '#david', 'family'}
{'totally', '@husainhaqqani', 'someone', 'barbaric', 'sincere', 'innocent', '@acted', 'condolences', 'inflicted', 'seems', '@scclemons', 'act', 'apologoies'}
{'lebenase', 'soldiers', 'got', 'occidental', 'far', '@acted', 'treatment', '@scclemons', 'beheaded', '@wikijeff', 'must', 'add', '3rd'}
{'western', 'aid', 'answer', 'blame', '@acted', 'honest', 'orgs', 'share', '@scclemons', 'use', 'workers', 'agents', 'equally', '@cia', 'pl

{'beheading', 'via', '#tcot', 'speak', 'hear', '#2a', 'victim', "can't", 'obama', 'hufford', '@jarjarbug', 'colleen'}
{'violence', 'place', 'sad', 'work', 'yeah', '@patvpeters', '@jarjarbug', 'family'}
{'need', 'authorities', 'sex', 'america', 'beheadings', 'care', 'frankly', 'color', 'stop', '@jarjarbug'}
{'us', 'crickets', 'admin', 'one', 'breaks', "could've", 'heart', 'awful', '@jarjarbug', '@jimdwrench', 'family'}
{'fail', 'send', '#ferguson', '@veritaz', 'counting', 'paid', 'cop', 'police', 'arrest', 'guy', 'month', '@jarjarbug', 'vacation'}
{'page', '#colleenhufford', '#memorial', '@jarjarbug', '@glcpops'}
{'polls', 'come', 'coward', 'speak', 'puppet', 'reads', 'ouseless', 'subject', '@snitfit', '@jarjarbug'}
{'peace', 'rest', 'woman', '@jarjarbug', '@hrhidaho', 'dear'}
{'@lilyisfree', 'comment', 'like', "#obama's", 'terrorism', 'says', 'great', 'hood', 'act', 'fort', 'pos', '@jarjarbug', 'deal'}
{'cain', 'allah', 'jesuscrist', 'brother', 'yeshua', '@jarjarbug', 'satan', '@mdj17'

{'painted', 'someone', 'million', '@complexmag', '@isaac_hoeee', 'bugatti', 'veyron', 'penis', 'spray'}
{'someone', 'respect', '@complexmag', 'bugatti', 'million', 'painted', 'veyron', 'penis', 'spray'}
{'painted', 'someone', 'million', '@complexmag', 'bugatti', 'veyron', 'penis', 'spray'}
{'@princessjenx', '@complexmag', 'shitty'}
{'painted', 'someone', 'million', '@complexmag', 'bugatti', 'veyron', 'penis', 'spray'}
{'someone', 'thought', '@complexmag', 'bugatti', 'million', 'painted', 'assumed', 'penis'}
{'@gingi_rse', '@complexmag', 'guy', 'never', 'seen'}
{'still', '@complexmag', 'would', '@teejayouellette', 'drive'}
{'someone', '@complexmag', 'bugatti', 'million', 'rt', 'painted', 'veyron', 'penis', 'spray', 'yikes'}
{'probably', '@complexmag', 'insured'}
{'sell', 'repainted', '@edsbs', '@complexmag', 'get'}
{'@wintersbreh', 'stop', '@complexmag', "can't"}
{'painted', 'someone', 'million', '@complexmag', 'bugatti', 'veyron', 'penis', 'spray'}
{'@complexmag', '@edsbs'}
{'painted',

{'eat', '@israelhatzolah', 'bastards', '@sandraalabama', 'pork'}
{'said', 'province', 'traveled', 'ruled', 'first', 'people', 'animals', 'ulmer', 'often', 'wuhan', 'conditions', 'coronavirus', 'flu', 'strep', 'tested', 'throat', 'strain', 'started', 'particular', 'humans'}
{'health', 'bisd', 'sherry', 'panelists', 'ocnashek', 'frank', 'public', 'tim', 'psychiatric', 'anita', 'director', 'mensah', 'ulmer', 'beaumont', 'assistant', 'emergency', 'elementary', 'education', 'jason', 'coordinator', 'clinic', 'management', 'superintendent'}
{'cases', 'ulmer', 'beaumont', 'jefferson', 'confirmed', 'adds', 'county'}
{'said', 'us', 'less', 'downplay', 'systems', 'fortunate', 'familiar', 'immune', 'people', 'viruses', 'concern', 'whose', 'want', 'older', 'compromised', 'scary', 'flu', 'vaccine', 'biggest', "we're"}
{'covid', 'knowledge', 'time', 'little', 'notes', "there's", 'vaccine', 'new'}
{'said', 'percent', 'essentially', 'says', 'people', 'mensah', 'cases', 'ulmer', 'common', 'get', 'texas'

{'lord', 'pepsi', 'dew', 'mountain', 'sweet', 'good', 'actually', 'testing', 'doritos-flavored'}
{'@buzzfeed', '@navicer_colonna'}
{'pepsi', 'pls', '@buzzfeed', 'sweet', 'pregnant', 'actually', 'testing', 'lord', 'yes', 'good', 'doritos-flavored', "i'm", 'dew', 'mountain'}
{'@buzzfeed', 'barrrrrf'}
{'pepsi', '@buzzfeed', 'sweet', 'actually', 'testing', 'lord', 'vomit', 'good', 'doritos-flavored', 'dew', 'mountain'}
{'lololol', '@jasmith0099', 'gross', '@coach_johnson9', '@buzzfeed'}
{'everything', '@samsvirtuallife', 'know', 'ya', '@buzzfeed', 'dorito-flavored', 'better'}
{'pepsi', '@buzzfeed', 'sweet', 'actually', 'testing', 'lord', 'good', 'doritos-flavored', 'dew', 'mountain'}
{'@buzzfeed', '@simo_no', 'link'}
{'pepsi', 'nobody', '@buzzfeed', 'sweet', 'actually', 'testing', 'lord', 'rt', 'good', 'doritos-flavored', 'wants', 'dew', 'fucking', 'mountain'}
{'poison', 'fucking', 'sounds', '@buzzfeed', 'like'}
{'breaking', '@buzzfeed', 'mold'}
{'@buzzfeed', 'fat', "we're"}
{'@buzzfeed', 

{'@nickcorbin17', 'american', 'puppy-sized', 'goliath', 'spider', '@uberfacts', 'america', 'known', 'birdeater', 'south', 'found'}
{'puppy-sized', 'live', 'spider', '@uberfacts', 'america', 'planet', 'south', 'anyways', 'want', 'found'}
{'@jaymee', '@uberfacts', 'spiders', 'cute', 'puppy-sized', 'puppies'}
{'@kevingfox', 'come', '@uberfacts'}
{'puppy-sized', 'spider', '@uberfacts', 'america', 'give', 'scream', 'south', 'something', 'found', 'wife'}
{'@j_roth1'}
{'@uberfacts', '@gabbywilson01', '@heatherbonnett'}
{'@uberfacts', 'new', '@maxi_padds', 'pet'}
{'@uberfacts', '@iamrheptarr', 'dude', 'told'}
{'@uberfacts', 'nope'}
{'@uberfacts', 'badly', '@_melhodgson', 'ad', 'stamp'}
{'kill', 'lays', '@uberfacts', 'fire', 'eggs', '@iluvyg2'}
{'@uberfacts', 'nah'}
{'@whiteniggajake', '@uberfacts', 'cry', "i'm", 'think'}
{'bitch', 'yeet', '@uberfacts', '@lukefromrowlett'}
{"i'd", 'panties', 'little', '@uberfacts', '@j_yauch15', 'shit'}
{'huh', '#fixitjesus', '@akwaugoigwe', 'uh'}
{'@nicksantar

{'hib', 'protection', 'b', 'protect', 'type', 'provide', 'pneumonia', 'haemophilus', 'coronavirus', 'vaccine', 'influenza', 'new', 'vaccines', 'pneumococcal'}
{'health', 'effective', 'recommended', 'illnesses', 'respiratory', 'virus', 'needs', 'protect', 'ncov', 'vaccination', 'highly', 'vaccine', 'new', 'different', 'vaccines', 'although'}
{'happy', 'art', 'use', '@tndeptofhealth', 'lung', 'clip', 'ban'}
{'spots', 'travelers', 'coming', 'band', '@tndeptofhealth', 'hot'}
{'adolph', 'trump', 'similarity', 'rapist', '@gotham3', 'donald'}
{'bunker', 'post', 'put', '@dodona777', 'hitler', 'time', 'machine', 'back', '@gotham3'}
{'air', "one's", 'come', 'rockin', 'force', 'knockin', '@gotham3'}
{'else', 'subtweet', 'everybody', '@gotham3'}
{'great', 'hitler', 'make', 'well', 'saying', 'germany', '@gotham3'}
{'mio', 'ay', 'dios', '@nicemangos', '@gotham3'}
{'happening', 'anything', 'bye', 'know', "what's", 'america', 'let', 'guess', 'world', '@gotham3', 'alone'}
{'guinea', 'insult', 'pig', '@

{'lightening', 'white', 'majority', 'belongs', 'rt', 'house', 'small', 'wrong', '@foxnews', '#lgbt', 'supports', 'ppl', '@duchesssandrena', 'group'}
{'cast', 'without', 'stone', 'let', 'first', '@foxnews', 'sin'}
{'away', 'sodom', 'gomorrah', 'nation', 'us', 'turned', '@foxnews', 'help', 'god'}
{'need', 'adr', 'like', 'looks', 'gay', 'throwdown', 'advertising', 'rave', 'option', 'twitter', 'down-vote', '@foxnews', 'big', 'tonight'}
{'wow', '@foxnews', 'embarrassed'}
{'#frc', 'seriously', 'dugger', 'hired', 'morality', '@megynkelly', 'trotting', '@foxnews', 'guy', 'talk', 'josh'}
{'@shitty_politics', 'damn', 'ever', 'let', 'away', 'practice', 'sure', '@guynextdoorrva', 'take', '@foxnews', 'religion', 'obama'}
{'america', 'like', '@foxnews', 'seem', 'loss', 'words'}
{'office', 'coat', 'brought', 'back', 'prestige', '@foxnews', "can't", 'presidential', 'wait', 'decorum', 'tie'}
{'@foxnews', '#thefaghouse'}
{'need', "people's", 'shameful', 'politicized', '@foxnews', 'house'}
{'gee', '@foxn

{'chinese', 'money', 'talking', 'going', 'help', 'iranian', 'country', 'politicians', 'iran', 'poor', 'honest', 'put', 'people', 'pandemic', 'rubbish', 'important', 'call', 'something', '@who', 'every', 'support', 'stop', 'china', 'pressure'}
{'listen', '@who', 'thomas', 'knows', '@thomasfrench28'}
{'@who', 'crazy', 'really', '@thomasfrench28'}
{'@christlike911', 'travel', 'available', 'outside', 'january', 'based', 'infected', 'restriction', 'recommend', 'people', '@who', 'already', 'information', 'current', '30th', 'china', 'crazy', 'trade', 'committee', 'deaths'}
{'one', 'spectacular', 'arenal', 'volcanoes', '#costarica', 'located'}
{'hella', '@visit_costarica', 'spectacular', 'volcanoes', 'one', 'arenal', 'beautiful', '#costarica', 'located'}
{'@visit_costarica', '@gradyjacks'}
{'@visit_costarica', 'spectacular', 'volcanoes', 'one', 'ive', 'arenal', '#costarica', 'located'}
{'@visit_costarica'}
{'#costarica', 'one', '@visit_costarica', 'arenal', 'spectacular', 'volcanoes', 'located

{'humid', 'climates', 'covid', 'virus', 'areas', 'transmitted', 'hot'}
{'protect', 'alcohol', 'covid', 'dangerous', 'drinking'}
{'covid', 'feeling', 'mean', 'lung', 'seconds', 'hold', 'without', 'breath', 'free', 'able', 'coughing', 'discomfort', 'coronavirus', 'disease'}
{'covid', 'recover', 'mean', 'catching', 'life', 'coronavirus', 'disease', 'new'}
{'covid', 'prevent', 'higher', 'degrees', 'exposing', 'sun', 'temperatures', 'coronavirus', 'disease', '25c'}
{'networks', 'covid', '5g', 'mobile', 'spread'}
{'poll', 'cnn', 'sanders', 'dem', '1st', 'orc', 'significantly', 'support', 'clinton', 'past', 'best', 'month', 'increased', 'debate'}
{'@cnnbrk', 'whaaaat', 'bernie'}
{'@cnnbrk', 'debate', 'bernie'}
{'#berniesanders', '@cnnbrk', 'find', 'gatekeepers', 'best', '#mh370', 'no.she', 'anymore'}
{'break', '@laurenedicillo', '@cnnbrk', "he's", 'good', 'free', 'socialist', 'hive', 'mind', 'try', '@redscarebot', 'thing', 'democrat'}
{'leftlobbyingunsafeunregulated', '@cnnbrk', 'legal', 'pcp

{'ideations', 'attempted', 'victims', 'happened', 'suicidal', 'regarding', 'wanted', 'wonder', '@tmz', 'many'}
{'collins', 'report', 'suicide', 'swarm', 'stephen', 'gerl', '#7thheaven', 'star', 'cops', '@tmz', 'home'}
{'@tmz'}
{'time', 'going', 'think', 'step', 'iron', 'things', 'back', 'media', 'tough', '@tmz', 'family'}
{'suicide', 'stephen', 'irony', 'report', '#7thheaven', 'star', 'cops', 'collins', '@tmz', 'swarm', 'home'}
{'mistaken', 'earth', 'stories', 'old', 'retweeting', 'dumb', '@tmz'}
{'collins', 'report', 'suicide', 'swarm', 'stephen', 'rt', '#7thheaven', 'star', 'cops', '@tmz', 'home'}
{'repost', 'happened', 'days', 'ago', '@tmz'}
{'done', 'hopefully', '@tmz', 'deal'}
{'travel', 'covid', 'networks', 'waves', 'spread', '#covid19', 'mobile', 'radio', 'spreading', 'viruses', '#5g', '5g', 'countries', 'cannot', 'facts', 'many'}
{'covid', '#covid19', 'drinking', 'protect', 'alcohol', 'facts', 'dangerous'}
{'covid', 'doctors', 'waves', 'human', "kit's", 'inside', 'masks', 'move

{'shooter', 'bought', 'church', 'alleged', 'burger', 'king', 'killings', 'hours', 'cops'}
{'@huffingtonpost', 'mean', 'deserves', 'starve', "that's", 'yall', 'smart', 'death', 'police', 'tactics', 'expect', 'criminal'}
{'@huffingtonpost', 'u', 'smh', 'bought', 'alleged', 'burger', 'king', 'cops', 'shooter', 'know', 'church', 'killings', 'hours'}
{'line', 'much', '@huffingtonpost', 'mcdonalds', 'long'}
{'poison', '@huffingtonpost', 'maybe', 'trying'}
{'imagine', 'least', '@huffingtonpost', 'southern', 'hospitality', 'caught'}
{'go', '@huffingtonpost', 'feed', 'charged', 'even', 'shooter', 'police', 'creep', '#charleston', 'yet', "let's"}
{'@huffingtonpost', 'hungry', 'folks', 'get'}
{'@huffingtonpost', 'outside', 'like', 'fiction', 'story', 'issues', 'read', 'blatant', 'dark', 'humor', 'look', 'spectacle'}
{'case', '@huffingtonpost', 'another', 'yet', 'police', 'brutality'}
{'beating', '@huffingtonpost', 'die', 'guy', 'nope', 'custody'}
{'#abundanceofintolerance', '@huffingtonpost', 'ex

{'phone', '#mh17', 'cossacks', 'agent', 'shot', 'rebel', 'leader', 'leaked', 'intel', 'call', 'new', 'russian'}
{'phone', '#mh17', 'agent', 'rebel', 'leaked', 'rt', 'war', 'intel', 'russian', 'call', 'fly', '@nycjim', 'fuck', 'new', 'leader'}
{'phone', '#mh17', 'cossacks', 'agent', 'shot', 'rebel', 'leaked', 'intel', 'russian', 'call', '@phillygirl1441', '@nycjim', 'new', 'leader'}
{'fabricating', 'versions', 'seems', 'source', 'reliable', '@nycjim'}
{'phone', '#mh17', 'cossacks', 'agent', 'shot', 'rebel', 'leaked', 'rt', 'intel', 'russian', 'call', '@nycjim', 'leader'}
{'@nycjim', 'checked'}
{'@nycjim', '@cencio4', '@nzaircraftfan'}
{'phone', '#mh17', 'amp', 'cossacks', 'agent', 'shot', 'rebel', 'leaked', 'rt', 'intel', 'russian', 'call', '@nycjim', 'new', 'leader'}
{'sources', 'check', 'anything', '@matevznovak', 'leaked', 'verified', '@nycjim'}
{'もう言い逃れできないわな', 'ありゃりゃ', '@shamilsh', '@shinichiroinaba', '@nycjim'}
{'proof', '@tarekfatah', '@nycjim'}
{'violence', 'tomorrow', 'africa',

{'currently', 'available', 'based', 'q', '#covid19', '#ibuprofen', 'could', 'recommend', 'people', '#digitalindia', '@who', '@_groundreality', '@digitalindialaw', 'ibuprofen', 'use', 'information', '@impraveendalal', 'disease', '@p4loindia', 'worsen', '@_telelaws'}
{'help', 'non', 'issue', 'regarding', 'would', '#digitalindia', 'requested', 'give', 'glad', 'lot', 'suggestion', '@who', '@digitalindialaw', 'ibuprofen', 'use', '#coronovirus', '@impraveendalal', 'symptoms', 'related', '@p4loindia', 'novel', 'clarified'}
{'also', 'recommend', '#digitalindia', 'nhs', 'say', '@_groundreality', '@digitalindialaw', 'ibuprofen', 'use', '@impraveendalal', 'paracetamol', 'see', '#coronavirus', 'safe', '@p4loindia', '@_telelaws'}
{'certain', 'limit', 'physicians', '#covid19', '#ibuprofen', 'reports', 'negative', 'concerns', 'consulting', 'aware', 'effects', 'via-a-vis', 'use', 'usual', 'populations', '@_perry4law', 'beyond', 'ones', 'treating', 'patients'}
{'ema', 'covid', 'non-steroidal', 'advice'

{'adopted', 'life', 'jewish', 'werewolf', 'boy', 'president', 'save', "argentina's"}
{'@ishaantharoor', 'come', 'dude'}
{'@washingtonpost', 'totally', '@ishaantharoor', 'onion', 'thought', 'article'}
{'@washingtonpost', '@ishaantharoor', 'kill', 'gentiles', 'best', 'understand', 'part'}
{'@washingtonpost', '@ishaantharoor', 'hoax', 'president', 'real', 'argentina', 'understanding'}
{'ussr', '@ilgraham82', 'american', 'nazis', '@ishaantharoor', '@genusunknown', 'program', 'nazi', 'received', 'usa', 'space', 'developed'}
{'jewish', 'joke', 'indian', '@ishaantharoor', 'could', 'japanese.pls', 'tradition', 'know'}
{'@ishaantharoor', 'con', 'religiones', 'hace', 'se', 'todas', 'solidaria', 'haciendo', 'por', 'que', 'las', 'pasar'}
{'@ishaantharoor', 'adopted', 'life', 'jewish', 'werewolf', 'boy', 'president', 'save', "argentina's"}
{'@ishaantharoor', 'judía', '@drapignata', 'la', 'capelu', 'de', 'jajaja'}
{'@washingtonpost', '@ishaantharoor', 'thought', 'rts', 'seen'}
{'@washingtonpost', 't

{'actions', 'explains', '@eamslider24', '@cnn'}
{'time', '@cnn', 'monday', 'tv', '@jamesmconie', '7pm', 'prime', 'see', 'friday', 'ch', 'ata'}
{'living', '@cnn', 'legend'}
{'air', 'attitude', '@cnn'}
{'@cnn'}
{'quit', 'way', 'well', '@cnn', 'job', 'one', 'slow', 'clap'}
{'network', 'open', 'like', 'oprah', "b'cos", '@cnn', 'inherited', 'kissed', 'life', 'media', 'must', 'goodbye', 'aunt', 'dead'}
{'@cnn'}
{'@cnni', '@cnn', 'lafffffffff'}
{'@cnni', 'lol', '@prodiver02', '@cnn', 'must', 'watch'}
{'hero', '@cnn'}
{'quit', 'way', 'like', '@cnn', 'lottery', "that's", 'lol'}
{"she'll", 'air', 'another', '@cnn', 'never', 'get', 'job'}
{'quit', '@rsmarsh22', 'mcdonalds', '@cnn'}
{'watches', 'crap', 'person', 'anyhow', 'news', '@cnn', 'televsion', 'kind', 'joke', 'actually'}
{'minutes', '@cnn', 'clown', 'looking'}
{'f', 'lol', 'k', '@cnn', 'clown'}
{'@cnn', 'cnn', 'lol'}
{'around', 'good', 'made', 'crazy', '@cnn', 'kind', 'erratic', 'unprofessional', 'look'}
{'happened', 'buy', 'money', 'think'

{'#coronavirus', 'cannot', '#covid19', 'kill', 'snow', 'new', '#knowthefacts', 'cold', 'weather', 'fact'}
{'cards', 'credit', 'low', 'infected', 'coins', '#covid19', 'banknotes', 'risk', 'touching', 'objects', '#coronavirus', 'new', '#knowthefacts', 'fact'}
{'bites', 'coronavirus', 'cannot', '#covid19', 'new', 'transmitted', '#knowthefacts', 'mosquito', 'fact'}
{'#weather', '#singapore', '#vaccine', '@who_mongolia', 'kill', 'time', '@takeshi_kasai', 'also', 'importantly', '#covid19', 'high', 'could', '#quarantine', 'surge', 'shown', 'hope', '#humidity', 'early', '@whophilippines', '#hygiene', 'cases', '@who', 'defense', '@whowpro', '#warm', 'cannot', '#coronavirus', 'soon', 'new', 'novel', '#intervention', 'developed'}
{'misleading', '@who_mongolia', 'information', '@whowpro', '@takeshi_kasai', '@whophilippines'}
{'persistent', '@who_mongolia', 'us', '@takeshi_kasai', 'affects', 'susceptible', '#covid19', 'virus', 'fever', '@whophilippines', 'body', 'affected', '#5g', '@who', 'temperat

{'thinks', '#makeitrain', 'sky', 'fall', 'going', '@realdonaldtrump', '@cnn', 'guess', '#trumpmoney', 'everyone'}
{'way', '@realdonaldtrump', 'branch', '@cnn', 'ugly', 'every', 'hit', 'tree', 'fall'}
{'@realdonaldtrump', '@cnn', '@kydaleporker'}
{'press', 'aligned', 'love', '@realdonaldtrump', 'guy', '@cnn', 'sharp', 'keeps', 'answers'}
{'@realdonaldtrump', 'money', 'american', 'trump', 'issues', 'african', '@cnn', 'much', 'uncle', 'donated', 'toms', '@ladyellev3', 'talk'}
{'need', '@jihadmarei', 'sometimes', 'fun', '@realdonaldtrump', '@cnn', 'people', 'time'}
{'@cnn', 'lol'}
{'among', 'mr.trump', 'candidates', '@realdonaldtrump', '@cnn', 'like', "i'm", 'iranian'}
{'aid', '@realdonaldtrump', '@cnn', 'intelligent', 'feel', 'mirror', 'beautiful', 'want', 'hearing', "trump's"}
{'take', 'documentary', 'great', 'us', '@realdonaldtrump', '@cnn', 'watch', 'idiocracy'}
{'ass', '@genlady9', 'innocent', 'ur', '@cnn', 'get', 'see', 'head', 'facts'}
{'holly', 'said', '@ldyguin', '@realdonaldtrump

{'covid', '@bindureddy', 'agree', 'humid', 'virus', 'areas', 'transmitted', 'climates', 'hot'}
{'effective', 'flatter', 'far', 'regions', 'weeks', 'virus', 'experiments', 'known', 'early', 'curves', 'temperatures', 'support', 'decrease', 'almost', 'likely', 'next', 'r0', 'deteriorate', 'nonexistent', 'warmer', 'claim'}
{'like', 'transmission', 'also', 'work', 'slows', 'shown', 'exposing', 'sun', 'get', 'rate', 'course', 'photo', '@enginutkan', 'definitely'}
{'@bindureddy', '#swineflu', 'transmission', 'based', 'sars', 'humid', 'swine', 'lower', 'experience', 'climates', 'hot', 'previous', 'w', 'significantly', 'coronaviruses', 'rates', '#sars', 'flu', '@enginutkan'}
{'companion', 'dogs', 'public', 'infected', 'busters', 'spread', '#coronavirusindia', 'cats', 'virus', 'present', 'animals', '@who', 'advice', 'ncov', "there's", 'coronavirus', 'myth', 'new', 'evidence', 'novel', 'pets', 'home'}
{'tract', 'harms', 'seasonal', 'less', 'h1n1', 'scared', '#coronavirusindia', 'respiratory', 'vi

{'outcome', 'thus', 'silence', 'guilty', 'getting', 'lying', 'admitting', 'caught', 'cause', 'know', '@abc', '@deray', 'choose'}
{'400k', '@abouddandachi', 'yet', '@abc', 'denial', '@deray', 'hearing'}
{'allow', 'like', 'sounded', 'qs', 'wrote', 'paid', 'believe', "cn't", "wdn't", 'take', 'gloves', '#darrenwilson', 'bc', 'kid', '@abc', 'george', '@deray', 'amt'}
{'said', 'knowledge', 'pay', 'abcoffered', 'six', 'figures', '@abcsource', 'interview', '#darrenwilson', 'talks', '@deray', 'mid-to-high'}
{'exclusive', 'paid', 'right', 'assumed', '@abc', '@deray', 'get'}
{'need', 'proceeds', 'mike', '@abc', 'redistributed', '@deray', 'brown', 'crime', 'family'}
{'abc', 'check', '#fergusondecision', 'pay', 'getting', 'darren', 'cashing', 'shooting', '@lizzzbrown', '@abc', 'big', '@deray', 'wilson'}
{'sisters', '@jbug33', '@abc', 'lets', '@deray', 'unite', 'brothers'}
{'paying', 'networks', 'got', 'discussed', 'understood', 'interview', 'confirmed', '@deray', 'none'}
{'cause', '@shar76', 'paid'

{'views', '@unboxtherapy', 'rt', 'test', 'plus', 'iphone', 'bend', 'incoming'}
{'@unboxtherapy', 'iphone', 'get'}
{'take', 'fail', '@unboxtherapy', 'design', 'back', 'huge'}
{'@unboxtherapy', '@mkbhd', 'new', 'presenting', 'iphone', 'plex'}
{'counting', 'views', '@unboxtherapy', 'million'}
{'gone', 'flex', '@unboxtherapy', '@mkbhd', '@kojifox', 'like', 'looks', 'wrong', 'lg'}
{'4k', 'setting', '@unboxtherapy', '@mkbhd', 'great', '#isheep', 'bt'}
{'appears', 'everything', 'explain', 'thumbnail', '@unboxtherapy', 'necessary', 'click', 'lol', 'video', 'play'}
{'@unboxtherapy', '@mkbhd'}
{'limit', 'torture', '@unboxtherapy', 'meaning', '@futbol_writer', 'take', '@simplifiedrools', 'fun', 'dude', 'test'}
{'iphone', 'wow', '@unboxtherapy', 'rt', 'incoming', 'views', 'plus', 'bend', '@mkbhd', 'test'}
{'damn', '@freddytv', 'iphone', '@unboxtherapy', 'still', '@yoanty1', 'plus', 'want', 'bend', 'test'}
{'bending', 'pockets', 'tight', 'jeans', 'reviews', '@unboxtherapy', 'momma', 'might', 'get',

{'susceptible', '#coronavirus', 'affect', 'new', 'people', 'older', 'younger', 'also'}
{'effective', '#coronavirus', 'new', 'treating', 'preventing', 'antibiotics'}
{'medicines', 'treat', '#coronavirus', 'new', 'specific', 'prevent'}
{'malaysian', 'airlines', '@chrisdyork', 'flying', 'terrifying.never', 'horrible', 'really'}
{'@chrisdyork', 'victims', 'political', 'would', 'agenda', 'must', 'act', 'families', 'anybody', 'inhumane', 'condole'}
{'sort', 'violence', '@chrisdyork', 'table', 'far', 'without', '@saracnelson', 'went', 'sit'}
{'@chrisdyork', 'thinking', 'omw'}
{'@chrisdyork', 'transport', 'shot', 'mistook', 'area', 'ukrainian', 'expected', 'rebels', 'il', 'boing', 'russian'}
{'@chrisdyork'}
{'@chrisdyork', 'hung', 'putin', 'caused', 'mass', 'must', 'tried', 'terrorist', 'murder', 'russian'}
{'shot', 'malaysian', 'breaking', 'airlines', '@chrisdyork', 'ukraine', 'passenger', 'near', 'border', 'russian'}
{'@chrisdyork', 'bigest', 'fool'}
{'@chrisdyork', 'heartbreaking'}
{'@hghor

{'whole', 'covid', 'go', 'map', 'pick', 'work', 'live', 'contaminated', 'come', 'people', '@noltenc', 'back', 'cases', '@jkc_jaycee', 'showing', 'say', "there's", 'want', 'get', 'corridor', 'care', 'air', '@madsquire', 'ppl', 'sick'}
{'comments', 'covid', 'ask', 'think', 'statement', 'sure', 'contaminated', 'whether', 'concerns', '@noltenc', 'smart-ass', 'nothing', '@jkc_jaycee', 'say', 'trying', 'deep', 'realize', "i'm", 'dig', '@madsquire', 'know', 'made'}
{'whole', 'covid', 'go', 'map', 'pick', 'work', 'live', 'contaminated', 'come', 'people', '@noltenc', 'back', 'cases', '@jkc_jaycee', 'showing', 'say', "there's", 'want', 'get', 'corridor', 'care', 'air', '@madsquire', 'ppl', 'sick'}
{'low', '@jrharrison1', 'scared', 'show', '@noltenc', 'may', 'wish', 'nothing', '@jkc_jaycee', 'fear', 'unless', 'recent', 'antibody', 'afraid', 'tests', 'likely', 'rate', 'since', 'stop', 'know', 'fatality', 'late', 'wanting', 'choose'}
{'got', 'station', 'going', 'live', "think's", 'community', 'come

{'thank', 'happy', 'wud', 'atlast', 'b', '@bbcbreaking', 'parent', 'god'}
{'@bbcbreaking', 'boko', 'abducted', 'says', 'agreed', "nigeria's", 'truce', 'w', 'haram', 'released', 'military', 'finally', 'schoolgirls', 'mt'}
{'anxiety', 'lol', 'power', 'diplomatic', 'regime', 'seems', '@bbcbreaking', 'full', 'truce', 'iam'}
{'wait', 'well', '@mapetiteniche'}
{'abducted', 'says', 'agreed', 'haram', 'schoolgirls', "nigeria's", 'released', 'military', 'truce', 'boko', 'group', 'militant'}
{'hear', 'gud', '@bbcbreaking'}
{'believe', '@bbcnews', 'well', '@bbcbreaking', 'girls', 'alive', 'see', "i'll"}
{'ceasefire', 'includes', '@bbcbreaking', 'release', 'hostages', 'boko', 'says', 'haram', 'nigerian', '#bringbackourgirls', 'government'}
{'@bbcbreaking'}
{'@bbcbreaking', 'ilks', 'believing', 'trusted', '#bokoharam', 'dream', "can't", '#bringbackourgirls', 'seeing'}
{'@bbcbreaking', 'good', 'almost', 'true'}
{'@bbcbreaking'}
{'ceasefire', 'includes', '@bbcbreaking', 'release', 'hostages', 'boko',

{'another', 'pt', 'national', '@aparanjape', 'training', 'teacher', 'skill', 'trainer', 'physical', 'education', 'document', 'activity', 'pe', 'sports', 'corporation', 'development', 'name'}
{'@prayagverma', 'yes', 'course', 'role'}
{'skills', 'outside', 'lack', 'acchhe', 'us', 'k12', '@aparanjape', 'ai', 'space', 'support', 'schools', 'din', 'hoping', 'curriculum', 'look', 'dt'}
{'wud', 'paper', 'expecting', 'u', 'post-it', '@aparanjape', 'notes', 'skill', 'studied', 'mba', 'hardware', 'build', 'drawing', 'much', 'using', 'something', 'able', 'end', 'except', 'teach', 'design', 'software', 'talk', 'thinking', 'learn', 'technical'}
{'person', '@aparanjape', 'right', 'taught', 'journeys', '@vedic_guru', 'probably', 'prototyping', 'critical', 'used', 'user', 'aspects', 'design', 'thinking', 'tools'}
{'drill', 'master', '@aparanjape', 'cha', 'p'}
{'much', 'fifty', 'leaving', 'yeah', 'jamie', 'grey', 'dornan', 'shades'}
{'leaving', 'invested', 'well', 'shades', 'yeah', 'much', 'fifty', "i'

{'kill', 'busters', 'spraying', 'q', '#covid19', 'viruses', 'body', 'alcohol', 'chlorine', 'already', 'entered', '#coronavirus', 'myth', 'new'}
{'detect', 'detecting', 'effective', 'infected', 'busters', 'q', 'scanners', 'fever', 'people', 'however', "can't", '#coronavirus', 'yet', 'myth', 'new', 'infection', 'sick', 'developed', 'thermal'}
{'effective', 'bacteria', 'preventing', 'busters', 'work', 'q', 'virus', 'treatment', 'viruses', 'ncov', 'therefore', 'used', '#covidー19', '#coronavirus', 'myth', 'new', 'treating', 'antibiotics', 'means', 'prevention'}
{'asthma', 'ages', 'like', 'infected', 'busters', 'younger', 'also', 'susceptible', 'q', '#covid19', 'affect', 'med', 'people', 'vulnerable', 'pre-existing', 'appear', 'older', 'diabetes', 'conditions', '#coronavirus', 'coronavirus', 'myth', 'new'}
{'person', 'saliva', 'mosquito', 'spread', 'infected', 'respiratory', '#covid19', 'virus', 'transmitted', 'discharge', 'droplets', 'sneezes', 'nose', 'bites', 'coughs', '#coronavirus', 'ca

{'hib', 'protection', 'protected', 'b', 'saline', '#corona', 'people', 'nose', 'type', 'provide', 'pneumonia', 'rinsing', 'regularly', 'haemophilus', 'vaccine', 'coronavirus', 'influenza', 'new', 'infection', 'evidence', 'vaccines', 'pneumococcal'}
{'protected', 'outbreak', 'antimicrobial', '#corona', 'properties', 'people', 'may', 'however', 'healthy', 'current', 'coronavirus', 'new', 'garlic', 'food', 'evidence', 'eating'}
{'asthma', 'ages', 'severely', 'infected', 'ill', 'medical', 'virus', 'people', 'vulnerable', 'becoming', 'pre-existing', 'ncov', 'appear', 'older', 'heart', 'diabetes', 'conditions', 'coronavirus', 'disease', 'new'}
{'recommended', 'treat', 'date', 'bacteria', 'prevent', 'work', '#corona', 'viruses', 'ncov', 'coronavirus', 'medicine', 'new', 'specific', 'antibiotics'}
{'desperate', 'shooter', 'leftis', 'read', 'transgender', 'planned', 'rumor', 'ted', 'based', 'claims', 'parenthood', 'cruz'}
{'narrative', 'issue', '@lacina52', 'real', 'obviously', 'pushing', '@bip

In [9]:
# features and their counts in tweet_events from commented block above (total of 21775 tweet objects)
features

{'context_annotations': {'domain': {'id': 4578,
   'name': 4578,
   'description': 4576},
  'entity': {'id': 4578, 'name': 4578, 'description': 1386}},
 'conversation_id': 21775,
 'author_id': 21775,
 'attachments': {'media_keys': 4100, 'poll_ids': 20},
 'in_reply_to_user_id': 20455,
 'entities': {'urls': 6284,
  'hashtags': 3401,
  'mentions': 17722,
  'annotations': 6656,
  'cashtags': 15},
 'id': 21775,
 'created_at': 21775,
 'possibly_sensitive': 21775,
 'lang': 21775,
 'reply_settings': 21775,
 'text': 21775,
 'source': 21765,
 'referenced_tweets': 20496,
 'public_metrics': {'retweet_count': 21775,
  'reply_count': 21775,
  'like_count': 21775,
  'quote_count': 21775},
 'geo': {'coordinates': 395, 'place_id': 869},
 'withheld': {'copyright': 14, 'country_codes': 14}}

In [4]:
import torch