In [1]:
import pandas as pd
import numpy as np

import os

import nltk
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords 

from transformers import BertForSequenceClassification, BertTokenizer, BertForMaskedLM

from simpletransformers.language_modeling import LanguageModelingModel

from sklearn.metrics.pairwise import cosine_similarity, paired_euclidean_distances
from sklearn.metrics.pairwise import euclidean_distances


from tqdm import tqdm
import torch


stop_words = set(stopwords.words('english')) 

In [2]:
dataFolder = '/data1/roshansk/covid_data/'
fileList = os.listdir(dataFolder)

df = pd.read_csv(os.path.join(dataFolder, fileList[0]), nrows = 1500000)

In [3]:
model = BertForSequenceClassification.from_pretrained('/data1/roshansk/Exp1/checkpoint-141753-epoch-1', output_hidden_states= True)

In [4]:
tokenizer = BertTokenizer.from_pretrained('/data1/roshansk/Exp1/checkpoint-141753-epoch-1')

### Extract Embeddings

In [225]:
a = hidden_states[:,9:13,:]
a = torch.sum(a,1)
a.shape



a.shape

torch.Size([29, 768])

In [5]:
symptom = 'fatigue'

token = tokenizer.encode(symptom)[1]

In [7]:
token

16342

In [8]:
def getSymptomEmbedding(model, df, symptom, symptomToken,  embeddingType = 'last4sum'):
    
    embeddingList = []
    messageList = []
    
    for i in tqdm(range(len(df))):
        
        if symptom in df.iloc[i]['message'].lower():
                 
            tokens = tokenizer.encode(df.iloc[i]['message'].lower())
            decoded = tokenizer.decode(tokens).split(" ")
            logits, hidden_states = model(torch.Tensor(tokens).unsqueeze(0).long())

            hidden_states = torch.stack(hidden_states).squeeze(1).permute(1,0,2)
            
            
            try:
                tokenIndex = tokens.index(symptomToken)
            except:
                a= 1
                continue
#                 print(df.iloc[i]['message'])
#                 print(tokens)

            
            
            
            if embeddingType == 'last4sum':
                embedding = torch.sum(hidden_states[tokenIndex,9:13,:],0)
            elif embeddingType =='last4concat':
                embedding = hidden_states[tokenIndex,9:13,:].reshape(-1)
            elif embeddingType == 'secondlast':
                embedding = hidden_states[tokenIndex,-2,:]
            else:
                embedding = hidden_states[tokenIndex,-1,:]
                
                
            embeddingList.append(embedding.detach().cpu().numpy())
            messageList.append(df.iloc[i]['message'].lower())
            
            if len(embeddingList)==30:
                break
            
            
    return embeddingList, messageList

In [9]:
embList,msgList = getSymptomEmbedding(model, df, 'fatigue', 16342, embeddingType='last4sum')

 89%|████████▉ | 445272/500000 [01:51<00:13, 4004.08it/s]


In [10]:
len(embList)

30

In [11]:
out= cosine_similarity(embList)

In [55]:
def dispMsgs(out,msgList, index = 0):
    
    print(msgList[index])
    print("------------")
    
    outSub = out[index,:]
    
    for i in range(len(outSub)):
        val = np.round(outSub[i],3)
        b = str(val)
        
        print(f" {b } |  {msgList[i]} ")
        print("___________________")
    

In [56]:
dispMsgs(out,msgList,0)

covid-19npossible signs/symptomsnfever over 100.4 degrees -- cough--fatiguenshortness of breath ---body aches https://t.co/dgokcf7sa5
------------
 1.0 |  covid-19npossible signs/symptomsnfever over 100.4 degrees -- cough--fatiguenshortness of breath ---body aches https://t.co/dgokcf7sa5 
___________________
 0.673 |  if fatigue is a symptom of #covid_19 then iâ€™ve had it for 10 years 
___________________
 0.781 |  2/ q: what are the symptoms of covid-19 infection?nna: fever, cough and fatigue are the most common. there can alsoâ€¦ https://t.co/9zowsmfnh4 
___________________
 0.781 |  #symptoms #coronavirã¼sã¼ #coronavirusitalianews #corona #flu #covid_19 nallergies #fatigue #headache #cough #feverâ€¦ https://t.co/re1sid5fpf 
___________________
 0.75 |  @jheneaiko one of covid-19 symptoms is fatigue, you should get checked hun 
___________________
 0.738 |  woke up with my voice like half gone out of no where. so i guess that sore throat wasnâ€™t just nothing or fatigue afâ€¦ https:

In [58]:
similarityThreshold = 0.65
baseEmbedding = embList[0]


finalEmbList = [baseEmbedding]

for i in range(out.shape[0]):
    if out[0][i] > similarityThreshold:
        finalEmbList.append(embList[i])
        
        
finalEmbList = np.array(finalEmbList)

In [59]:
folder = 'EmbFolder/'
filename = 'fatigue_16342_Emb'
filename = os.path.join(folder, filename)


np.save(filename  , finalEmbList)

In [60]:
len(finalEmbList)

18

In [204]:
out

array([[1.        , 0.8996097 , 0.89481527, 0.6530163 , 0.8256697 ,
        0.90821946, 0.8455477 , 0.5973476 , 0.8275117 , 0.85419655,
        0.9165817 , 0.8636142 , 0.8030719 , 0.78260416, 0.9023808 ,
        0.7475438 , 0.878124  , 0.8870477 , 0.8258983 , 0.68710107,
        0.8426281 , 0.83700526, 0.801172  , 0.8606221 , 0.9011506 ,
        0.78979605, 0.7759943 , 0.8807336 , 0.86347926, 0.860159  ],
       [0.8996097 , 0.99999994, 0.8816011 , 0.6251153 , 0.89663947,
        0.89290464, 0.85728663, 0.6124271 , 0.8074592 , 0.86844   ,
        0.9007571 , 0.88845795, 0.82773894, 0.7921606 , 0.9005848 ,
        0.72148764, 0.90127325, 0.883248  , 0.8422947 , 0.6323065 ,
        0.87002873, 0.8673276 , 0.79174834, 0.86993074, 0.90635943,
        0.82164514, 0.83301795, 0.869472  , 0.879458  , 0.890677  ],
       [0.89481527, 0.8816011 , 0.99999976, 0.6401872 , 0.82928824,
        0.9131049 , 0.9000822 , 0.5801636 , 0.87975365, 0.8245122 ,
        0.88463074, 0.8658762 , 0.8522479 , 0.

In [144]:
out_secondlast= cosine_similarity(embList)

In [183]:
np.min(out)

0.3100328

In [184]:
out

array([[0.99999976, 0.73836255, 0.91137004, 0.87314737, 0.40480646,
        0.60998845, 0.6382924 , 0.676646  , 0.5991013 , 0.750648  ,
        0.59147465, 0.62863564, 0.8781458 , 0.9366606 , 0.9461085 ,
        0.86836183, 0.72613376, 0.6321299 , 0.6106101 , 0.6064559 ,
        0.61523384, 0.5547477 , 0.8294285 , 0.43086767, 0.5105665 ,
        0.9216633 , 0.6432327 , 0.55082643, 0.8362919 , 0.8319864 ],
       [0.73836255, 0.99999976, 0.70941305, 0.6585871 , 0.5201549 ,
        0.7103508 , 0.654395  , 0.8068874 , 0.6327089 , 0.81923515,
        0.7190993 , 0.7731551 , 0.7016377 , 0.7430638 , 0.7083738 ,
        0.685463  , 0.6655614 , 0.7916493 , 0.7605152 , 0.73518753,
        0.6404505 , 0.7185116 , 0.674138  , 0.5792736 , 0.6920639 ,
        0.7187174 , 0.8169085 , 0.65379786, 0.7358049 , 0.64817   ],
       [0.91137004, 0.70941305, 1.        , 0.84301734, 0.42836285,
        0.6395159 , 0.7122334 , 0.7000246 , 0.66421497, 0.79577994,
        0.6197717 , 0.63806844, 0.8500441 , 0.

In [205]:
msgList[0]

'loving how i predicted the #coronavirus in my online book @ https://t.co/xcqdimiieg'

In [206]:
msgList[1]

'the markets is hemorrhaging #trump and his pals would like us to believe that #coronavirus is to blame, but some ofâ€¦ https://t.co/bee5tslose'

In [207]:
msgList[3]

'unbelievable and crazy stuff by this scholar on corona. why jihad all the time? @imamofpeace'

In [208]:
msgList[7]

'tbh... these mfs just diagnosing everyone with corona to get yâ€™all more paranoid ðÿ¤¦ðÿ\x8f¾â€\x8dâ™‚ï¸\x8f #coronaoutbreak'

In [185]:
msgList[0]

'per the state health department: nnsymptoms of covid-19 can include fever, cough and breathing trouble. most develoâ€¦ https://t.co/u1cnlahiw1'

In [196]:
msgList[3]

'yâ€™all really made toilet paper a commodity when the symptoms of covid-19 are shortness of breath, fever, and cough according to the cdc ðÿ¥´'

In [186]:
msgList[4]

'someone go grab mitch mcconnell cough in his face and kick his ass into his senate seat.'

In [190]:
msgList[16]

'me after goggling my cough and finding out i have the #covid_19 #coronavirusupdate https://t.co/qd8fkf0svh'

In [191]:
msgList[17]

'really need to step back and calm the panic when a @walmart employee threatens to spray down anyone who coughs herâ€¦ https://t.co/wednc73hhi'

In [193]:
msgList[23]

'next person cough around me catchin these elbows tf!? #coronavirus https://t.co/9cgrgghqxu'

In [194]:
msgList[24]

'if youâ€™re one of those fucking dumb asses that cough in the air all out in the open like you donâ€™t give a fuck, youâ€¦ https://t.co/quoefmk3gi'

In [148]:
msgList[8]

'can you catch covid-19 through the internet. i feel like i have a fever after reading several articles about it.'

In [124]:
msgList[4]

'i may be suffering from cabin fever during this #wfh #coronavirus shutdown, but at least @bluejeansnet  is keepingâ€¦ https://t.co/yanhcmj8qi'

In [125]:
msgList[5]

'if you think you have been exposed to covid-19 and develop a fever and symptoms, such as cough or difficulty breathâ€¦ https://t.co/3ajbtidepc'

In [103]:
print(out[0][1])
print(out[0][3])
print(out[0][5])

print(out[1][3])
print(out[1][5])

print(out[3][5])

0.9167755
0.81526864
0.12466867
0.8423799
0.13835989
0.11168517


In [109]:
msgList[13]

'#earlyspring #neighborhoodwalk #businesserrand #coronavirus #cabinfever https://t.co/aouoiisasv'

In [130]:
hidden_states[1,9:13,:].reshape(-1)

torch.Size([3072])

In [111]:
tokenizer.encode(msgList[13])

[101,
 1001,
 2220,
 13102,
 4892,
 1001,
 5101,
 17122,
 1001,
 2449,
 2121,
 13033,
 1001,
 21887,
 23350,
 1001,
 6644,
 7959,
 6299,
 16770,
 1024,
 1013,
 1013,
 1056,
 1012,
 2522,
 1013,
 20118,
 19098,
 6137,
 20939,
 2615,
 102]

In [57]:
tokenizer.tokenize('Ugh. Week 1 of quarantine and already my Instagram feed is boring. #CabinFever #coronavirus')

['u',
 '##gh',
 '.',
 'week',
 '1',
 'of',
 'qu',
 '##aran',
 '##tine',
 'and',
 'already',
 'my',
 'ins',
 '##tagram',
 'feed',
 'is',
 'boring',
 '.',
 '#',
 'cabin',
 '##fe',
 '##ver',
 '#',
 'corona',
 '##virus']

In [37]:
hidden_states.shape

torch.Size([29, 13, 768])

In [21]:
hidden_states[0].shape

torch.Size([1, 29, 768])

In [22]:
len(decoded)

22

In [23]:
len(tokens)

29

In [38]:
df.iloc[0]['message'].lower()

'loving how i predicted the #coronavirus in my online book @ https://t.co/xcqdimiieg'

In [25]:
decoded

['[CLS]',
 'loving',
 'how',
 'i',
 'predicted',
 'the',
 '#',
 'coronavirus',
 'in',
 'my',
 'online',
 'book',
 '@',
 'https',
 ':',
 '/',
 '/',
 't.',
 'co',
 '/',
 'xcqdimiieg',
 '[SEP]']

In [29]:
for i in tokens:
    print(tokenizer.ids_to_tokens[i])

[CLS]
loving
how
i
predicted
the
#
corona
##virus
in
my
online
book
@
https
:
/
/
t
.
co
/
x
##c
##q
##dim
##ii
##eg
[SEP]


In [43]:
tokens.index(1001)

6

In [42]:
tokens

[101,
 8295,
 2129,
 1045,
 10173,
 1996,
 1001,
 21887,
 23350,
 1999,
 2026,
 3784,
 2338,
 1030,
 16770,
 1024,
 1013,
 1013,
 1056,
 1012,
 2522,
 1013,
 1060,
 2278,
 4160,
 22172,
 6137,
 13910,
 102]

In [49]:
torch.sum(hidden_states[0,9:13,:],0).shape

torch.Size([768])

### Finding new symptoms

In [94]:
os.listdir('EmbFolder/')

['.ipynb_checkpoints',
 'fever_9016_Emb.npy',
 'fatigue_16342_Emb.npy',
 'cough_19340_Emb.npy',
 'corona_21887_Emb.npy']

In [77]:
file = 'cough_19340_Emb.npy'
embList = np.load(os.path.join('EmbFolder/',file))

In [78]:
embList.shape

(12, 768)

In [79]:
embList = np.mean(embList,0)

In [80]:
embList.shape

(768,)

In [81]:
embList1 =embList.copy()

In [82]:
sim = cosine_similarity(a, embList.reshape(1,-1)).reshape(-1)

NameError: name 'a' is not defined

In [86]:
def getSimilarWords(model, df, symptom, embList, similarityThreshold = 0.3, numThreshold = 10000):
    
     
    output = []
    
    for i in tqdm(range(len(df))):
        
        if symptom in df.iloc[i]['message'].lower():

                 
            tokens = tokenizer.encode(df.iloc[i]['message'].lower())
            decoded = tokenizer.decode(tokens).split(" ")
            logits, hidden_states = model(torch.Tensor(tokens).unsqueeze(0).long())

            hidden_states = torch.stack(hidden_states).squeeze(1).permute(1,0,2)
            
            
            hidden_states = hidden_states[:,9:13,:]
            hidden_states = torch.sum(hidden_states,1).detach().cpu().numpy()
            
            similarity = cosine_similarity(hidden_states, embList.reshape(1,-1)).reshape(-1)

                            
            index = np.where([similarity> similarityThreshold])[1]

            selectTokens = np.array(tokens)[index]
            selectSim = similarity[index]
                      


            for j in range(len(index)):
                token = tokenizer.ids_to_tokens[selectTokens[j]]
                sim = selectSim[j]
                output.append((token, sim,i))

            
        if i==numThreshold:
            break
            
    return output

In [93]:
'' in df.iloc[2]['message']

True

In [89]:
out = getSimilarWords(model, df, 'cough', embList, numThreshold=100)

  0%|          | 100/500000 [00:14<19:31:09,  7.11it/s]


In [90]:
out

[('##virus', 0.30831546, 0),
 ('corona', 0.30955774, 1),
 ('##virus', 0.3466893, 1),
 ('##virus', 0.33004266, 6),
 ('##€', 0.30950922, 7),
 ('##virus', 0.32824627, 7),
 ('##sp', 0.3172115, 10),
 ('##virus', 0.31550515, 16),
 ('corona', 0.34058088, 18),
 ('19', 0.30022344, 20),
 ('smoke', 0.34953845, 24),
 ('contracting', 0.34738213, 24),
 ('##vid', 0.31336504, 24),
 ('19', 0.3346812, 24),
 ('ncaa', 0.307199, 29),
 ('##virus', 0.30647093, 29),
 ('drink', 0.33798996, 30),
 ('##virus', 0.31608182, 32),
 ('corona', 0.3053613, 36),
 ('##virus', 0.34411445, 36),
 ('fears', 0.31254637, 42),
 ('##virus', 0.30179173, 44),
 ('##virus', 0.3222639, 45),
 ('19', 0.30629843, 53),
 ('pain', 0.34232175, 54),
 ('neck', 0.38556516, 54),
 ('##virus', 0.3084358, 56),
 ('virus', 0.35500538, 60),
 ('cold', 0.42805433, 60),
 ('sick', 0.35689628, 61),
 ('corona', 0.317369, 66),
 ('##virus', 0.31515524, 67),
 ('##virus', 0.30161792, 69),
 ('19', 0.30848825, 70),
 ('##virus', 0.3224981, 70),
 ('corona', 0.32555

In [76]:
import pickle

pickle.dump( favorite_color, open( "save.p", "wb" ) )

In [270]:
df.iloc[3295]['message']

'@chadfelixg On the bright side, runny nose means itâ€™s likely not #Coronavirus https://t.co/hhe3j48PvO'

In [272]:
df.iloc[3516]['message']

'What You Need to Know: Coronavirus and People with Spinal Cord Injuries, Brain Injuries or MS https://t.co/geMA8NJ1Zt #coronavirus'

In [273]:
df.iloc[3890]['message']

'Looks like coronavirus can cause irreversible lung scarring (interstitial pneumonia: scarring of lung tissue). nnIâ€¦ https://t.co/P9pNSaTV7h'

In [274]:
df.iloc[3947]['message']

'Some info for those like me taking biologics for psoriasis & psoriatic arthritis regarding the #coronavirus : https://t.co/zTRFemUf4M'

In [277]:
df.iloc[4971]['message']

'It\'s past "Cover Your Mouth" just don\'t do that coughing shit around me #CoronavirusPandemic #Covid_19 https://t.co/vVcqgt78MP'

In [278]:
len(out)

1928

In [283]:
outMap = {}

for i in range(len(out)):
    if out[i][0] in outMap:
        outMap[out[i][0]].append(out[i][1])
    else:
        outMap[out[i][0]] = [out[i][1]]

In [284]:
len(outMap)

238

In [285]:
outMap_ ={}
for i in outMap:
    outMap_[i] = np.mean(outMap[i])

In [286]:
outMap

{'##virus': [0.30831546,
  0.3466893,
  0.33004266,
  0.32824627,
  0.31550515,
  0.30647093,
  0.31608182,
  0.34411445,
  0.30179173,
  0.3222639,
  0.3084358,
  0.31515524,
  0.30161792,
  0.3224981,
  0.3617989,
  0.33449817,
  0.34737694,
  0.30857837,
  0.31292236,
  0.3259898,
  0.33004615,
  0.33324295,
  0.32734406,
  0.31033534,
  0.30134034,
  0.30826017,
  0.3131056,
  0.31170028,
  0.31454965,
  0.31290483,
  0.3035832,
  0.34006602,
  0.3055134,
  0.30279928,
  0.30082613,
  0.33029425,
  0.32223082,
  0.32569683,
  0.3417984,
  0.33894128,
  0.31249017,
  0.31611317,
  0.3352574,
  0.30447155,
  0.33248693,
  0.34688798,
  0.32439673,
  0.3203219,
  0.31379128,
  0.30391723,
  0.31993484,
  0.30488563,
  0.3519808,
  0.31502518,
  0.3656077,
  0.32751158,
  0.30592817,
  0.32883286,
  0.3032828,
  0.3707632,
  0.35980478,
  0.35344484,
  0.30618414,
  0.30730647,
  0.3190143,
  0.3457697,
  0.3163243,
  0.306384,
  0.32339585,
  0.31927457,
  0.31547043,
  0.33618754,
  

### Symptom Analysis

In [7]:
import pickle
outputFolder = '/data1/roshansk/SymptomAnalysis/'

In [6]:
os.listdir(outputFolder)

['fatigue_500k_thresh0.3.p',
 'fatigue_10k_thresh0.3.p',
 'cough_10k_thresh0.3.p',
 'cough_500k_thresh0.3.p',
 'cough_1500k_thresh0.3.p',
 'fever_500k_thresh0.3.p',
 'fever_10k_thresh0.3.p',
 'fatigue_1500k_thresh0.3.p',
 'fever_1500k_thresh0.3.p']

In [121]:
filename = 'cough_10k_thresh0.3.p'

output = pickle.load(open( os.path.join(outputFolder, filename), "rb" ))

In [122]:
outMap = {}

for i in range(len(output)):
    if output[i][0] in outMap:
        outMap[output[i][0]].append(output[i][1])
    else:
        outMap[output[i][0]] = [output[i][1]]
        
        
outMap_ = {}

for i in range(len(output)):
    if output[i][0] in outMap_:
        outMap_[output[i][0]].append(output[i][2])
    else:
        outMap_[output[i][0]] = [output[i][2]]

In [123]:
outMap

{'##virus': [0.30831546,
  0.3466893,
  0.33004266,
  0.32824627,
  0.31550515,
  0.30647093,
  0.31608182,
  0.34411445,
  0.30179173,
  0.3222639,
  0.3084358,
  0.31515524,
  0.30161792,
  0.3224981,
  0.3617989,
  0.33449817,
  0.34737694,
  0.30857837,
  0.31292236,
  0.3259898,
  0.33004615,
  0.33324295,
  0.32734406,
  0.31033534,
  0.30134034,
  0.30826017,
  0.3131056,
  0.31170028,
  0.31454965,
  0.31290483,
  0.3035832,
  0.34006602,
  0.3055134,
  0.30279928,
  0.30082613,
  0.33029425,
  0.32223082,
  0.32569683,
  0.3417984,
  0.33894128,
  0.31249017,
  0.31611317,
  0.3352574,
  0.30447155,
  0.33248693,
  0.34688798,
  0.32439673,
  0.3203219,
  0.31379128,
  0.30391723,
  0.31993484,
  0.30488563,
  0.3519808,
  0.31502518,
  0.3656077,
  0.32751158,
  0.30592817,
  0.32883286,
  0.3032828,
  0.3707632,
  0.35980478,
  0.35344484,
  0.30618414,
  0.30730647,
  0.3190143,
  0.3457697,
  0.3163243,
  0.306384,
  0.32339585,
  0.31927457,
  0.31547043,
  0.33618754,
  

In [124]:
outputDf = []

for key in outMap.keys():
    length = len(outMap[key])
    mean = np.mean(outMap[key])
    
    outputDf.append([key, length, mean])


In [125]:
outputDf = pd.DataFrame(outputDf)

In [126]:
outputDf.columns = ['word','counts','mean_sim']

In [127]:
outputDf = outputDf.sort_values('mean_sim', ascending=False)

In [131]:
outputDf.head(40)

Unnamed: 0,word,counts,mean_sim
49,cough,19,0.771186
48,fever,13,0.618594
323,headache,2,0.593513
50,breathing,3,0.586231
122,throat,6,0.578483
491,asthma,2,0.55978
31,respiratory,7,0.543818
238,nose,6,0.513759
271,coughed,2,0.508535
121,mouth,4,0.494317


In [42]:
def dispTweets(outMap_, word, df):
    
    index = outMap_[word]
    
    for i in index:
        print(df.iloc[i]['message'])
        print("----------------")
    

In [135]:
dispTweets(outMap_,'obesity',df)

Not only is there a high likelihood (increasing daily) that Trump (at age 73) is infected, but his obesity, lack ofâ€¦ https://t.co/sNn5Jev4ON
----------------


In [43]:
dispTweets(outMap_,'##itis',df)

literslly thought i had bronchitis cuz i was coughing for a literal month and a half ðŸ™‡ðŸ¾â€â™€ï¸
----------------
The other day I went to the pharmacy to pick up some meds. I have acute bronchitis. I walked in and coughed into myâ€¦ https://t.co/wL0a5eYmOP
----------------
I had a crazy fever at the beginning of January. I had a cough & an early onset of bronchitis. Iâ€™ve never experiencâ€¦ https://t.co/bxpRw9JNoW
----------------
Helpful case series describing covid-19 in children: Cough and pharyngitis. Less than half had fever. Includes asymâ€¦ https://t.co/gZTaHr46gy
----------------
@One_Onyx_Night Well, had been laying low anyway because of this bronchitis. It's gotten better but I'm still coughâ€¦ https://t.co/2H60Wn3dhq
----------------
I have had a cough since the middle of February. Bronchitis. No fever or sore throat. I'm staying home so no one frâ€¦ https://t.co/ylbhCeQ4tN
----------------


In [47]:
dispTweets(outMap_,'dizzy',df)

I'm still really sick. Been coughing so much my ribs are sore. Have a sore throat, am achy, congested, dizzy, nauseâ€¦ https://t.co/AIBcjg64Xa
----------------
I had a low fever for 2 days (gone now) and now have cough, headache, dizzy and chest feels tight. Went to dr, he hâ€¦ https://t.co/puzj1IqgaJ
----------------


In [7]:
for i in range(500000):
    if 'asthmatic' in df.iloc[i]['message']:
        print(df.iloc[i]['message'])
        print(df.iloc[i]['message_id'])
        print(df.iloc[i]['user_id'])
        print("--------------------")

For every #diabetic and #asthmatic peer out there, be safe! #Covid_19 #CoronavirusPandemic
1238304621925285888
301173496
--------------------
Fellow asthmatics- please, please, please take your medication as prescribed ðŸ™ðŸ¼ It can be easy to think â€œmy asthmaâ€™â€¦ https://t.co/Kc2gZCV62H
1238310538163208192
202272587
--------------------
@Target sonce you all have me working through this whole #CoronavirusPandemic and Iâ€™m asthmatic yâ€™all gonna give me hazard pay???
1238428180266049537
549864765
--------------------
How do I know the difference between my throat hurts bc COVID-19 and my throat hurts bc I am an asthmatic stoner with seasonal allergies
1238528164088946688
275134837
--------------------
#askCDC Coughs are common for asthmatics this time of year. When do we worry that it is something more? Does COVID-â€¦ https://t.co/80AnhBhpmx
1238529254448672769
1344750043
--------------------
Start social distancing now. Stop thinking youâ€™re above it or it wonâ€™t affect you.

KeyboardInterrupt: 

In [6]:
df.head()

Unnamed: 0,message_id,user_id,message,created_at_utc,retweeted,retweet_message_id,in_reply_to_message_id,in_reply_to_user_id,favorite_count,retweet_count,...,tweet_location,tweet_location_short,place_type,coordinates,coordinates_state,fips,county,city,state,RT
0,1238220897720336390,790013818999074820,loving how i predicted the #coronavirus in my ...,2020-03-12 21:50:41,,0,0,0,0,0,...,,,,,,12095.0,Orange County,winter park,FL,0
1,1238220898307432448,320608440,The markets is hemorrhaging #trump and his pal...,2020-03-12 21:50:41,,0,0,0,0,0,...,,,,,,25005.0,Bristol County,Freetown,MA,0
2,1238220912530522123,305058336,Amar es prevenirn#coronavirus,2020-03-12 21:50:45,,0,0,0,0,0,...,,,,,,12086.0,Miami-Dade County,Miami,FL,0
3,1238220933384593413,137437056,"Chris's glib, histrionic commentary on COVID-1...",2020-03-12 21:50:50,,0,0,0,0,0,...,,,,,,11001.0,District of Columbia,Washington,D.C.,0
4,1238220933766230017,1012237740,If only COVID-19 started in Madagascar,2020-03-12 21:50:50,,0,0,0,0,0,...,,,,,,40015.0,Caddo County,Anadarko,OK,0
