In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction import stop_words
from sklearn.feature_extraction.text import TfidfVectorizer
import os
from nltk.corpus import stopwords 
import nltk
import re
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity,euclidean_distances



In [2]:
#nltk.download('stopwords')

In [3]:
stop = set(['?',',',':','@']) 

In [4]:
ROOT = '../data/'

QA_dict = {}

In [5]:
os.listdir(ROOT)

['CDC.xlsx',
 'Coronavirus (COVID-19) frequently asked questions _ CDC.pdf',
 'covidquestionsca.xlsx',
 'EAC.xlsx',
 'JHU.xlsx',
 'MOHKE.xlsx',
 'sources.txt',
 'WHO.xlsx']

In [6]:
paths = [ROOT+d for d in os.listdir(ROOT) if d.endswith(".xlsx")]

In [7]:
paths

['../data/CDC.xlsx',
 '../data/covidquestionsca.xlsx',
 '../data/EAC.xlsx',
 '../data/JHU.xlsx',
 '../data/MOHKE.xlsx',
 '../data/WHO.xlsx']

In [8]:
data = [ pd.read_excel(p, names = ['q','a']) for p in paths]

In [9]:
QA = pd.DataFrame()

In [10]:
for d in data[:]:
    if d.shape[0]>0:
        print(d.shape)
        QA = pd.concat((QA,d),axis = 0)

(116, 2)
(10, 2)
(26, 2)
(86, 2)
(85, 2)


In [11]:
QA.shape

(323, 2)

In [12]:
QA.sample(10)

Unnamed: 0,q,a
22,How many cases have been reported in the Unite...,COVID-19 case counts for the United States are...
17,Will warm weather stop the outbreak of COVID-1...,It is not yet known whether weather and temper...
60,Will I get sick if I help care for them?,If you’re caring for them directly without ade...
16,Can I get sick with COVID-19 if it is on food?\n,Based on information about this novel coronavi...
91,"Is it safe to vacuum in a school, business, or...",The risk of transmitting or spreading SARS-CoV...
36,What is multisystem in ammatory syndrome in ch...,CDC is working with state and local health dep...
41,"With limited testing available, how do we tell...",People who have symptoms should assume they ha...
0,What is a coronavirus?,Coronaviruses are a family of viruses that typ...
84,Am I at risk if I go to a funeral or visitatio...,There is currently no known risk associated wi...
112,What precautions should be taken for animals t...,Imported animals will need to meet CDC and USD...


In [13]:
 snow = nltk.stem.SnowballStemmer('english')

In [14]:
def clean(q_a):
    temp = []
    snow = nltk.stem.SnowballStemmer('english')
    for sentence in q_a:
        sentence = str(sentence)
        sentence = sentence.lower()                 # Converting to lowercase
        cleanr = re.compile('<.*?>')
        sentence = re.sub(cleanr, ' ', sentence)        #Removing HTML tags
        sentence = re.sub(r'[?|!|\'|"|#]\d',r'',sentence)
        sentence = re.sub(r'[.|,|)|(|\|/]@',r' ',sentence)        #Removing Punctuations
        words = [snow.stem(word) for word in sentence.split() if word not in stop]   # Stemming and removing stopwords
        temp.append(words)
    q_a = temp  
    return q_a

In [15]:
def clean_col(c):
    sent = []
    for row in c:
        sequ = ''
        for word in row:
            sequ = sequ + ' ' + word
        sent.append(sequ)
    c = sent
    return c

In [16]:
q_new =  clean_col(clean(QA['q']))
a_new =  clean_col(clean(QA['a']))

In [17]:
QA['q_new'] =  q_new
QA['a_new'] = a_new

In [18]:
QA.head()

Unnamed: 0,q,a,q_new,a_new
0,What is a novel coronavirus?,A novel coronavirus is a new coronavirus that ...,what is a novel coronavirus?,a novel coronavirus is a new coronavirus that...
1,Why is the disease being called coronavirus di...,"On February 11, 2020 the World Health Organiza...",whi is the diseas be call coronavirus diseas ...,"on februari 11, 2020 the world health organ a..."
2,Why might someone blame or avoid individuals a...,People in the U.S. may be worried or anxious a...,whi might someon blame or avoid individu and ...,peopl in the u.s. may be worri or anxious abo...
3,How can people help stop stigma related to COV...,People can ght stigma by providing social supp...,how can peopl help stop stigma relat to covid...,peopl can ght stigma by provid social support...
4,Why do some state’s COVID-19 case numbers some...,CDC’s overall case numbers are validated throu...,whi do some state covid-19 case number someti...,cdc overal case number are valid through a co...


In [19]:
w2v_data = QA['q_new']

In [20]:
splitted = []
for row in w2v_data: 
    splitted.append([word for word in row.split()]) 

In [21]:
train_w2v = Word2Vec(splitted,min_count=1,size=100, workers=4)

In [22]:
avg_data = []
for row in splitted:
    vec = np.zeros(100)
    count = 0
    for word in row:
        try:
            vec += train_w2v[word]
            count += 1
        except:
            pass
    avg_data.append(vec/count)

  import sys


In [23]:
avg_data = np.array(avg_data)

In [24]:
for i,(q , a) in enumerate(zip(QA['q'],QA['a'])):
    QA_dict[i] = [q,a]                   

In [25]:
def wv_test(q):
    tq = [q]
    tq = clean(tq)
    tq = clean_col(tq)
    splitted_test = []
    for row in tq: 
        splitted_test.append([word for word in row.split()]) 
    avg_data_test = []
    for row in splitted_test:
        vec = np.zeros(100)
        count = 0
        for word in row:
            try:
                vec += train_w2v[word]
                count += 1
            except:
                pass
        avg_data_test.append(vec/count)
    return np.array(avg_data_test)

In [26]:
QA['q'].sample(20)

52     Is it correct that if someone in your home is ...
73     What progress is being made on therapies or tr...
102    Should I avoid contact with pets or other anim...
19     What is the possibility of reinfection after y...
52       What if my child needs to go to the hospital?\n
57     Are there acceptable designs of PPE that local...
62     How much food should people have on hand? Are ...
69                    Should I be tested for COVID-19?\n
17     Will warm weather stop the outbreak of COVID-1...
14     What can I do to protect myself and prevent th...
60     What cleaning products should I use to protect...
53     Can you avoid the virus by drinking warm water...
18     Is it safe to use public transport (Dalla Dall...
32                            When is testing important?
111    Can I travel to the United States with dogs or...
84     Does WHO recommend that all international mass...
70                Where can I get tested for COVID-19?\n
67     What are the symptoms an

In [29]:
while True:
    q = input("Q: ")
    q = wv_test(q)
    ranks = np.array([cosine_similarity(q.reshape(1,100),avg_data[i].reshape(1,100)) for i in range(QA.shape[0])]).reshape(QA.shape[0],1)
    loc = np.argmax(ranks)  
    a = QA_dict[loc][1]
    print("A: ",a)

Q: public transport


  


A:  The CDC now recommends that people without symptoms wear cloth masks in public to reduce the risk of asymptomatic spread. A reasonable position would be to encourage wearing masks while emphasizing that it's social distancing and not just the mask that prevents spread from person to person. 
Q: pets


  


A:  Some animals, like ferrets and maybe cats, are susceptible to this virus. Dogs were shown to be more resistant. The recommendations right now are to keep your distance from pets if you are diagnosed with COVID-19. The good news is we don’t think that pets are very likely to get sick if they are actually exposed to or infected with SARS-CoV-2 virus. But out of an abundance of caution—since we don’t know this virus well just yet—it’s a good idea to play it safe. 
Q: dog


  


A:  Please refer to CDC’s requirements for bringing a dog to the United States. The current requirements for rabies
vaccination apply to dogs imported from high-risk countries for rabies.

Q: testing


  


A:  Using the CDC-developed viral test, a negative result means that the virus that causes COVID-19 was not found in the
person’s sample. In the early stages of infection, it is possible the virus will not be detected.
For COVID-19, a negative test result for a sample collected while a person has symptoms likely means that the COVID-
19 virus is not causing their current illness.

Q: where can i go for test


  


A:  The process and locations for testing vary from place to place. Contact your state, local, tribal, or territorial
department for more information, or reach out to a medical provider. State and local public health departments have
received tests from CDC while medical providers are getting tests developed by commercial manufacturers. While
supplies of these tests are increasing, it may still be di cult to nd someplace to get tested. See Testing for COVID-
19 for more information.

Q: get infected again


  


A:  We don’t know for sure which animals can be infected with the virus that causes COVID-19. CDC is aware of a small
number of pets, including dogs and cats, reported to be infected with the virus that causes COVID-19, mostly after
close contact with people with COVID-19. A tiger at a zoo in New York has also tested positive for the virus. Recent research shows that ferrets, cats, and golden Syrian hamsters can be experimentally infected with the virus
and can spread the infection to other animals of the same species in laboratory settings. Pigs, chickens, and ducks did
not become infected or spread the infection based on results from these studies. Data from one study suggested dogs
are not as likely to become infected with the virus as cats and ferrets. These ndings were based on a small number of
animals, and do not show whether animals can spread infection to people.
At this time, there is no evidence that animals play a signi cant role in spreading the virus that causes COVID-19.

  


A:  Most people with disabilities are not inherently at higher risk for becoming infected with or having severe illness from
COVID-19. Some people with physical limitations or other disabilities might be at a higher risk of infection because of
their underlying medical condition.
People with certain disabilities might experience higher rates of chronic health conditions that put them at higher
risk of serious illness and poorer outcomes from COVID-19. Adults with disabilities are three times more likely to
have heart disease, stroke, diabetes, or cancer than adults without disabilities.
You should talk with your healthcare provider if you have a question about your health or how your health condition is
being managed.

Q: get infected again


  


A:  We don’t know for sure which animals can be infected with the virus that causes COVID-19. CDC is aware of a small
number of pets, including dogs and cats, reported to be infected with the virus that causes COVID-19, mostly after
close contact with people with COVID-19. A tiger at a zoo in New York has also tested positive for the virus. Recent research shows that ferrets, cats, and golden Syrian hamsters can be experimentally infected with the virus
and can spread the infection to other animals of the same species in laboratory settings. Pigs, chickens, and ducks did
not become infected or spread the infection based on results from these studies. Data from one study suggested dogs
are not as likely to become infected with the virus as cats and ferrets. These ndings were based on a small number of
animals, and do not show whether animals can spread infection to people.
At this time, there is no evidence that animals play a signi cant role in spreading the virus that causes COVID-19.

  


A:  When a US citizen dies outside the United States, the deceased person’s next of kin or legal representative should
notify US consular o cials at the Department of State. Consular personnel are available 24 hours a day, 7 days a week,
to provide assistance to US citizens for overseas emergencies. If a family member, domestic partner, or legal
representative is in a di erent country from the deceased person, he or she should call the Department of State’s
O ce of Overseas Citizens Services in Washington, DC, from 8 am to 5 pm Eastern time, Monday through Friday, at
888-407-4747 (toll-free) or 202-501-4444. For emergency assistance after working hours or on weekends and holidays,
call the Department of State switchboard at 202-647-4000 and ask to speak with the Overseas Citizens Services duty
o cer. In addition, the US embassy  closest to or in the country where the US citizen died can provide assistance.

Q: can i keep my family safe


  


A:  Watch your child for any signs of illness. If you see any sign of illness consistent with symptoms of COVID-19, particularly fever, cough, or shortness of breath, call your healthcare provider and keep your child at home and away from others as much as possible.Follow CDC’s guidance on “What to do if you are sick.”

Q: fuck


  


ValueError: Input contains NaN, infinity or a value too large for dtype('float64').