#Exploring using full text of articles


Imports and Loads

In [3]:
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', -1)



import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize


from pprint import pprint


from sklearn.feature_extraction.text import CountVectorizer


# Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel
from gensim import matutils, models


# spacy for lemmatization
import spacy


import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)




FULL TEXT ANALYSIS 




In [5]:
import pandas as pd
# ingest body text for articles 
df_covid = pd.read_csv('C:/Users/Revathi/subset1000.csv')# Articles combined. 

text = df_covid.drop(["paper_id","doi","title_abstract_body","Unnamed: 0", "abstract", "title"], axis=1) # drop all columns except body_text
words = []
for ii in range(0,len(text)):
    words.append(str(text.iloc[ii]['body_text']).split(" "))
    
# Build the bigram and trigram models
bigram = gensim.models.Phrases(words, min_count=5, threshold=100) # higher threshold fewer phrases.
trigram = gensim.models.Phrases(bigram[words], threshold=100)  

# Faster way to get a sentence clubbed as a trigram/bigram
bigram_mod = gensim.models.phrases.Phraser(bigram)
trigram_mod = gensim.models.phrases.Phraser(trigram)

In [6]:
# Define functions for stopwords, bigrams, trigrams and lemmatization
def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts]
def make_bigrams(texts):
    return [bigram_mod[doc] for doc in texts]
def make_trigrams(texts):
    return [trigram_mod[bigram_mod[doc]] for doc in texts]
def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
    """https://spacy.io/api/annotation"""
    texts_out = []
    for sent in texts:
        doc = nlp(" ".join(sent)) 
        texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags])
    return texts_out



Natural Language Toolkit to remove English, French, and Spanish Stopwords. 
Extended the stopwords to include Medical terms. 




In [7]:
# NLTK Stop words
from nltk.corpus import stopwords
stop_words = stopwords.words('english') + stopwords.words('spanish') + stopwords.words('french')
stop_words.extend(['doi', 'preprint', 'copyright', 'peer', 'reviewed', 'org', 'https', 'et', 'al', 'author', 'figure', 'rights', 'reserved', 'permission', 'used', 'using', 'biorxiv', 'medrxiv', 'license', 'fig', 'fig.', 'al.', 'Elsevier', 'PMC', 'CZI'])
# Remove Stop Words
data_words_nostops = remove_stopwords(words)
# Form Bigrams
data_words_bigrams = make_bigrams(data_words_nostops)

In [9]:

# Initialize spacy 'en' model, keeping only tagger component (for efficiency)

#nlp = spacy.load('en', disable=['parser', 'ner'])
nlp = spacy.load("en_core_web_sm")
nlp.max_length = 1900000 # increased for size of body texgt 
# Do lemmatization keeping only noun, adj, vb, adv
data_lemmatized = lemmatization(data_words_bigrams, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])

In [10]:
# Create Dictionary
id2word = corpora.Dictionary(data_lemmatized)

# Create Corpus
texts = data_lemmatized

# Term Document Frequency
corpus = [id2word.doc2bow(text) for text in texts]

In [11]:
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                           id2word=id2word,
                                           num_topics=20, 
                                           update_every=1,
                                           chunksize=8000,
                                           passes=4,
                                           iterations=400, 
                                           eval_every=None,
                                           alpha='auto',
                                           per_word_topics=True)

In [12]:
# Print the Keyword in the 20 topics
from pprint import pprint

pprint(lda_model.print_topics())
doc_lda = lda_model[corpus]

[(0,
  '0.032*"protein" + 0.009*"plant" + 0.008*"cell" + 0.007*"vaccine" + '
  '0.007*"virus" + 0.007*"produce" + 0.006*"report" + 0.006*"expression" + '
  '0.006*"may" + 0.005*"production"'),
 (1,
  '0.010*"show" + 0.010*"protein" + 0.010*"variant" + 0.009*"mutation" + '
  '0.008*"condition" + 0.008*"phase" + 0.007*"temperature" + 0.006*"study" + '
  '0.006*"velocity" + 0.006*"thermal"'),
 (2,
  '0.023*"protein" + 0.014*"cell" + 0.009*"infection" + 0.008*"viral" + '
  '0.008*"virus" + 0.008*"cchfv" + 0.007*"activity" + 0.005*"group" + '
  '0.005*"show" + 0.005*"bind"'),
 (3,
  '0.013*"sample" + 0.010*"virus" + 0.008*"test" + 0.007*"result" + '
  '0.007*"sequence" + 0.007*"base" + 0.007*"analysis" + 0.007*"model" + '
  '0.006*"also" + 0.006*"show"'),
 (4,
  '0.045*"cell" + 0.026*"virus" + 0.014*"infection" + 0.010*"viral" + '
  '0.009*"macrophage" + 0.008*"show" + 0.008*"antibody" + 0.007*"infect" + '
  '0.005*"also" + 0.005*"replication"'),
 (5,
  '0.008*"datum" + 0.008*"case" + 0.007


Filtering the covid-19 and risk related articles



In [14]:
covid19_keywords =['sars-cov-2', 'covid-19', '2019-ncov', 
                   'novel-coronavirus',
                   'coronavirus 2019','wuhan pneumonia',
                   '2019ncov', 'covid19',
                   'sarscov2', 'coronavirus-2019']

In [15]:
risk_keywords =['smoking', 'immunosuppress', 'pulmonary', 
                'pre-existing', 'co-infection', 'neonate',
               'pregnant', 'socio-economic',
               'economic', 'reproductive number', 'incubation period',
               'serial interval', 'transmission', 'hospitalized',
               'chronic', 'co-morbidity', 'lung', 'respiratory',
               'high-risk', 'pneumonia', 'gastrointestinal']


Queries from the task 



In [17]:
queries = pd.read_csv('C:/Users/Revathi/queries.csv')

queries.rename(columns={'queries':'Queries'})

Unnamed: 0,Queries
0,"risk with smoking, vaping, and pre-existing pulmonary disease (copd)"
1,risk with Co-infections (determine whether co-existing respiratory/viral infections make the virus more transmissible or virulent) and other co-morbidities
2,"risks for neonates, newborns, and pregnant women"
3,Socio-economic and behavioral factors to understand the economic impact of the virus and whether there were differences.
4,"Transmission dynamics of the virus, including the basic reproductive number, incubation period, serial interval, modes of transmission and environmental factors"
5,"Severity of disease, including risk of fatality among symptomatic hospitalized patients, and high-risk patient groups"
6,Populations that are more susceptible to the disease
7,Public health mitigation measures that could be effective for control




First, the papers in the subset were split into sentences, and each sentence was embedded using 
the Universal Sentence Encoder (USE) from Tensorflow .


Each sentence was embedded into a 512 length vector .



In [25]:
import tensorflow as tf
import tensorflow_hub as hub

from sklearn.neighbors import NearestNeighbors

embed = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')

embedded_query = embed(['risk with smoking'])
embedded_query = embed(['risks for neonates, newborns, and pregnant women'])


knn = NearestNeighbors(n_neighbors=20, algorithm='ball_tree')

distances, indices = knn.kneighbors(encoded_query.reshape(1, -1))





For each of the nearest neighbor sentences,
the three sentences before and after it (if those sentences existed) were used to form an excerpt.

 
HuggingFace Pytorch Transformers library. It works by embedding the sentences, 
running a clustering algorithm (kmeans), and finding the sentences that are closest to the cluster's centroids.



In [20]:

top5 = use_and_bm25[use_and_bm25['task_id'] == 2].head(5)[['task_text_x','excerpt']]

top5.rename(columns={'task_text_x':'Query', 'excerpt':'Excerpt'})

Unnamed: 0,Query,Excerpt
40,"risks for neonates, newborns, and pregnant women","In this retrospective analysis from five hospitals, we included pregnant women with Covid-19 from January 1 to February 20, 2020. The primary composite endpoints were admission to an intensive care unit (ICU), the use of mechanical ventilation, or death. Secondary endpoints included the clinical severity of Covid-19, neonatal mortality, admission to neonatal intensive care unit (NICU), and the incidence of acute respiratory distress syndrome (ARDS) of pregnant women and newborns. ~~TAG~~ Thirty-three pregnant women with Covid-19 and 28 newborns were identified. ~~TAG~~ One (3%) pregnant woman needed the use of mechanical ventilation. No pregnant women admitted to the ICU. There were no moralities among pregnant women or newborns."
41,"risks for neonates, newborns, and pregnant women","Thirty-three pregnant women with Covid-19 and 28 newborns were identified. One (3%) pregnant woman needed the use of mechanical ventilation. No pregnant women admitted to the ICU. ~~TAG~~ There were no moralities among pregnant women or newborns. ~~TAG~~ The percentages of pregnant women with mild, moderate, and severe symptoms were 13 (39.4%),19(57.6%), and 1(3%). One (3.6%) newborn developed ARDS and was admitted to the NICU. The rate of perinatal transmission of SARS-CoV-2 was 3.6%."
42,"risks for neonates, newborns, and pregnant women","18 Studies have reported that the mortality rate of patients with COVID-19 is about 1·4%. 19 In our study, the maternal, fetal, and neonatal outcomes of pregnant women with COVID-19 pneumonia are better than those with SARS infection. This might be associated with our small number of cases and short delivery time (ie, as soon as possible after diagnosis). ~~TAG~~ Previous studies have shown no evidence of perinatal SARS infection in infants born to mothers who had SARS infection during pregnancy. ~~TAG~~ 18, 20 One neonate in our study was infected with COVID-19 36 h after birth. However, the viral nucleic acid tests of the placenta and cord blood in this patient were negative for SARS-CoV-2, so intrauterine vertical transmission might not have occurred; thus, further study is needed. However, this study has several limitations."
43,"risks for neonates, newborns, and pregnant women","Among the 33 women, 27 women delivered 28 newborns, and the live birth rate was 100%. Five women had ongoing pregnancy without severe obstetrical complications at the time of manuscript preparation. The overall rate of obstetrical complications was 22.2%, including three cases of preterm premature rupture of membranes, two cases of hypertensive diseases of the pregnancy, two cases of gestational diabetes mellitus, and one case of spontaneous preterm labor. ~~TAG~~ Therefore, the obstetrical outcomes from pregnant women with SARS-CoV-2 infection appear better than that for pregnant women with SARS. ~~TAG~~ However, it's worth noting that most pregnant women were infected with SARS-CoV-2 during the third trimester in this study. The gestational age when women are infected may affect obstetrical outcomes. In a review of 11 pregnant women with MERS-CoV infection, the infant death rate was All rights reserved."
44,"risks for neonates, newborns, and pregnant women","The severity of viral pneumonia in pregnancy is evidently related to physiological and immunological changes that result in a shift from cell-mediated to humoral-mediated immunity [7] . When pregnant women become infected with viral pneumonia, they are more likely to have complications and progress to severe cases [8] . A study in Hong Kong in 2004 shows that SARS during pregnancy is associated with high incidences of spontaneous miscarriage, preterm delivery, and intrauterine growth restriction [9] . ~~TAG~~ Another study shows that pregnant women with pneumonia have an increased risk of developing low birth weight infants, preterm births, restricted fetal growth, and 5-minute Apgar score <7 compared to healthy pregnant women [10] . ~~TAG~~ Although there have been no clinical or serological reports of SARS or MERS in neonatal infections in existing studies [9, 17] , evidence of vertical motherto-child transmission in other respiratory viruses such as H1N1 and respiratory syncytial virus (RSV) have been reported [11] . There have been several studies concerning intrauterine vertical transmission potential of COVID-19 and its effects on newborns [12, 13] . All the newborns of COVID-19 infected mothers in these cases were negative for nucleic acid test and one study shows that COVID-19 may have adverse effects on newborns, causing problems such as fetal distress, premature labor, respiratory distress, thrombocytopenia accompanied by abnormal liver function, and even death [13] ."


In [54]:
from summarizer import Summarizer

input_tag_sentences = [

    " Pregnant women are susceptible population of SARS-CoV-2 which are more likely to have complications and even progresse to severe illness. ",
    " Pregnant women are susceptible population of COVID-19 which are more likely to have complications and even progresse to severe illness. ",
    " Another study shows that pregnant women with pneumonia have an increased risk of developing low birth weight infants, preterm births, restricted fetal growth, and 5-minute Apgar score <7 compared to healthy pregnant women [10] . ",
    " There is little information about effects of COVID-19 on Pregnant women and newborns as a sensitive population. ",
    " When a baby is born vaginally it is exposed to the mother\'s gut microbiome, therefore if a baby does get infected with coronavirus a few days after birth we currently cannot tell if the baby was infected in the womb or during birth. ",
    " It is known that pregnant women are potentially at increased risk of complications from any respiratory disease due to the physiological changes that occur in pregnancy. ",
    " There is evidence that the use of corticosteroids during pregnancy increase the risk of preterm birth, low birthweight and preeclampsia 34 . ",
    " The COVID-19 infection during pregnancy also increase risks of several adverse outcomes, including higher rates of C-section delivery, low birth weight, and preterm birth. ",
    " treatments, and pregnancy outcomes for the women who have been infected with COVID-19 during their pregnancy. ",
    " An increasing proportion of the women are now infected with this virus during their pregnancy, which may put them in danger in terms of adverse maternal and newborn outcomes. ",
    " Women during pregnancy often face several pregnancy related complications and more susceptible to respiratory pathogens that may put them at higher risk of adverse . "

model = Summarizer()
result = model(''.join(input_tag_sentences))
output_text_summary = ''.join(result)

In [52]:


qa_summary = question_answer_summary.rename(columns={'RISK_FACTOR_QUESTION':'Query',
                                        'ANSWER_SUMMARY':'Summary',
                                        'TOP_5_ARTICLES':'Top 5 Articles'})

embedded_query = embed(['risks for neonates, newborns, and pregnant women'])
embedded_query = embed(['Socio-economic and behavioral factors to understand the economic impact of the virus and whether there were differences.'])


qa_summary[['Query','Summary', "Top 5 Articles"]]

Unnamed: 0,Query,Summary,Top 5 Articles
0,"risk with smoking, vaping, and pre-existing pulmonary disease (copd)","ACE-2 expression in lower airways is increased in patients with COPD and with current smoking. This could be happening in lung tissue, facilitating ACE2 expression, as observed during lung cancer and COPD. Because individuals with chronic obstructive pulmonary disease (COPD) are at increased risk of severe COVID-19, we determined whether ACE-2 expression in the lower airways was related to COPD and cigarette smoking.","[('ACE-2 Expression in the Small Airway Epithelia of Smokers and COPD Patients: Implications for COVID-19', 'http://doi.org/10.1101/2020.03.18.20038455'), ('ACE2 Expression is Increased in the Lungs of Patients with Comorbidities Associated with Severe COVID-19', 'http://doi.org/10.1101/2020.03.21.20040261'), ('Prevalence, Severity and Mortality associated with COPD and Smoking in patients with COVID-19: A Rapid Systematic Review and Meta-Analysis Authors contributions', 'http://doi.org/10.1101/2020.03.25.20043745'), ('Cigarette smoke triggers the expansion of a subpopulation of respiratory epithelial cells that express the SARS-CoV-2 receptor ACE2', 'http://doi.org/10.1101/2020.03.28.013672'), ('Epidemiological, clinical and virological characteristics of 74 cases of coronavirus-infected disease 2019 (COVID-19) with gastrointestinal symptoms', 'http://doi.org/10.1136/gutjnl-2020-320926')]"
1,risk with Co-infections (determine whether co-existing respiratory/viral infections make the virus more transmissible or virulent) and other co-morbidities,"SARS-CoV-2 replication will be boosted leading to exacerbation of symptoms, substantial virus shedding, as well as increased risk for nosocomial transmission and secondary infection. As the virus spreads, research data from Chen et al showed that the initial symptoms of newly infected patients seemed to be more subtle, and the virus may lie in asymptomatic carriers for a long time. The close contact to infected individuals, either asymptomatic or clinical COVID-19 cases, increase the risk of infection.","[('SARS-CoV-2 and COVID-19: The most important research questions', 'http://doi.org/10.1186/s13578-020-00404-4'), ('COVID-19: Knowns, Unknowns, and Questions', 'http://doi.org/10.1128/msphere.00203-20'), ('A Review of Coronavirus Disease-2019 (COVID-19)', 'http://doi.org/10.1007/s12098-020-03263-6'), ('The COVID-19 pandemic and the use of MS disease-modifying therapies', 'http://doi.org/10.1016/j.msard.2020.102073'), ('Title: Antibody responses to SARS-CoV-2 in patients of novel coronavirus disease 2019 Brief Title: Antibody responses in COVID-19 patients Summary Background', 'http://doi.org/10.1101/2020.03.02.20030189')]"
2,"risks for neonates, newborns, and pregnant women","The obstetrical outcomes from pregnant women with SARS-CoV-2 infection appear better than that for pregnant women with SARS. There were no moralities among pregnant women or newborns. Anaesthesia-related complications occur more frequently in the COVID-19 parturients and their newborns have a high risk of distress. Pregnant women are susceptible population of COVID-19 which are more likely to have complications and even progresse to severe illness. treatments, and pregnancy outcomes for the women who have been infected with COVID-19 during their pregnancy.","[('Clinical features and the maternal and neonatal outcomes of pregnant women with coronavirus disease 2019', 'http://doi.org/10.1101/2020.03.22.20041061'), ('Clinical features and obstetric and neonatal outcomes of pregnant patients with COVID-19 in Wuhan, China: a retrospective, single-centre, descriptive study', 'http://doi.org/10.1016/s1473-3099(20)30176-6'), ('A case report of neonatal COVID-19 infection in China', 'http://doi.org/10.1093/cid/ciaa225'), ('Anaesthetic managment and clinical outcomes of parturients with COVID-19: a multicentre, retrospective, propensity score matched cohort study Short Title: Obstetric anaesthesia for parturients with COVID-19', 'http://doi.org/10.1101/2020.03.24.20042176'), ('Clinical characteristics of COVID-19 infection in pregnant women: a systematic review and meta-analysis', 'http://doi.org/10.1101/2020.04.05.20053983')]"
3,Socio-economic and behavioral factors to understand the economic impact of the virus and whether there were differences.,"There are some similarities and differences in the epidemiology and clinical features between these two viruses and diseases that are caused by these viruses. A main objective of this paper is to quantify the effect of various social and economic factors in mediating the transmission rates of the virus, which may help identify potential behavioral and socioeconomic risk factors for infections. Factors considered in our analysis are the end date of the outbreak, the total number of cases, economic impact, and students' graduation. these parameters vary considering the heterogeneity of the population, location of virus transmission, and socio-economic and political factors [Rabajante, .","[('Systematic Comparison of Two Animal-to-Human Transmitted Human Coronaviruses: SARS-CoV-2 and SARS-CoV', 'http://doi.org/10.3390/v12020244'), ('Lactate dehydrogenase, a Risk Factor of Severe COVID-19 Patients: A Retrospective and Observational Study', 'http://doi.org/10.1101/2020.03.24.20040162'), ('Comment Tackling two pandemics: a plea on World Tuberculosis Day', 'http://doi.org/10.1016/s2213-2600(20)30151-x'), ('Journal Pre-proof Shell disorder analysis predicts greater resilience of the SARS-CoV-2 (COVID-19) outside the body and in body fluids Shell disorder analysis predicts greater resilience of the SARS- CoV-2 (COVID-19) outside the body and in body fluids', 'http://doi.org/10.1016/j.micpath.2020.104177'), ('Tracing DAY-ZERO and Forecasting the Fade out of the COVID-19 Outbreak in Lombardy, Italy: A Compartmental Modelling and Numerical Optimization Approach', 'http://doi.org/10.1101/2020.03.17.20037689')]"
4,"Transmission dynamics of the virus, including the basic reproductive number, incubation period, serial interval, modes of transmission and environmental factors","The key problems surrounding this novel virus are as follows: diagnosis, mode of transmission, long incubation period (3 to 14 days), predicting the number of infected cases in the community, and insufficient protection resources due to its pandemic specification. All of the provinces outside Hubei were included in the analysis of demographic characteristics, key time-toevent intervals, incubation period, and serial interval. Increasingly, epidemiological studies are performed in real-time during an outbreak to understand key metrics such as the epidemic's reproduction number, serial interval distribution, incubation period and risk of international spread 2,3. In simplest terms, the reproductive number of a disease is a function of its transmission rate and the duration of infectious period.","[('Temporal dynamics in viral shedding and transmissibility of COVID-19', 'http://doi.org/10.1101/2020.03.15.20036707'), ('Articles Evolving epidemiology and transmission dynamics of coronavirus disease 2019 outside Hubei province, China: a descriptive and modelling study', 'http://doi.org/10.1016/s1473-3099(20)30230-9'), ('A Review of Coronavirus Disease-2019 (COVID-19)', 'http://doi.org/10.1007/s12098-020-03263-6'), ('Epidemiological parameters of coronavirus disease 2019: a pooled analysis of publicly reported individual data of 1155 cases from seven countries Summary Background', 'http://doi.org/10.1101/2020.03.21.20040329'), ('deepMINE -Natural Language Processing based Automatic Literature Mining and Research Summarization for Early-Stage Comprehension in Pandemic Situations specifically for COVID-19', 'http://doi.org/10.1101/2020.03.30.014555')]"
5,"Severity of disease, including risk of fatality among symptomatic hospitalized patients, and high-risk patient groups","Liver injury was the most common complication (in 75 patients), followed by acute respiratory distress (in 14 patients) in pneumonia patients. Of these patients, 55 (24.9%) were severe patients and 166 (75.1%) patients were non-severe (Table 1). Lymphopenia was common in patients with COVID-19 and SARS patients, and can be used as an indicator of disease severity and prognosis. Patients at older age and with medical comorbidities are at the most risk of requiring hospitalization, ICU care, and at risk for death.","[('Journal Pre-proof Epidemiological, clinical characteristics of cases of SARS-CoV-2 infection with abnormal imaging findings clinical characteristics of cases of SARS-CoV-2 infection with abnormal imaging findings Epidemiological, clinical characteristics of cases of SARS-CoV-2 infection with abnormal imaging findings', 'http://doi.org/10.1016/j.ijid.2020.03.040'), ('Title: First 12 patients with coronavirus disease', 'http://doi.org/10.1101/2020.03.09.20032896'), ('Clinical features and outcomes of 221 patients with COVID-19 in Wuhan, China', 'http://doi.org/10.1101/2020.03.02.20030452'), ('Reduction of lymphocyte at early stage elevates severity and death risk of COVID-19 patients: a hospital-based case-cohort study', 'http://doi.org/10.1101/2020.04.02.20050955'), ('Clinical characteristics of severe acute respiratory syndrome coronavirus 2 reactivation', 'http://doi.org/10.1016/j.jinf.2020.03.001')]"
6,Populations that are more susceptible to the disease,"Disease epidemics ultimately follow a sigmoidal shape, as they approach the carrying capacity of the disease. The epidemic of infectious diseases is a complex spreading process that occurs in population. The disease has 0.9% infection fatality rate meaning that almost one percent of individuals catching it eventually die from the disease.","[('A deductive approach to modeling the spread of COVID-19', 'http://doi.org/10.1101/2020.03.26.20044651'), ('One size does not fit all -Patterns of vulnerability and resilience in the COVID-19 pandemic and why heterogeneity of disease matters', 'http://doi.org/10.1016/j.bbi.2020.03.016'), ('A MODIFIED SEIR MODEL TO PREDICT THE COVID-19 OUTBREAK IN SPAIN: SIMULATING CONTROL SCENARIOS AND MULTI-SCALE EPIDEMICS', 'http://doi.org/10.1101/2020.03.27.20045005'), ('Identification of COVID-19 Can be Quicker through Artificial Intelligence framework using a Mobile Phone-Based Survey in the Populations when Cities/Towns Are Under Quarantine', 'http://doi.org/10.1017/ice.2020.61'), ('Autocatalytic Model for Covid-19 Progression in a Country', 'http://doi.org/10.1101/2020.04.03.20052985')]"
7,Public health mitigation measures that could be effective for control,"Infection control measures within the hospital setting and an aggressive public health response might also have prevented further exposures. Effective measures have been taken to control the pandemic in a timely manner, a similar approach in surveillance of the human population could be applicable, and requires public health leadership. In the baseline situation, there is no public health measure to control epidemics (c = 0).","[('First known person-to-person transmission of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) in the USA', 'http://doi.org/10.1016/s0140-6736(20)30607-3'), ('Personal View Can we contain the COVID-19 outbreak with the same measures as for SARS?', 'http://doi.org/10.1016/s1473-3099(20)30129-8'), ('Investigation of three clusters of COVID-19 in Singapore: implications for surveillance and response measures', 'http://doi.org/10.1016/s0140-6736(20)30528-6'), ('SARS-CoV-2: an Emerging Coronavirus that Causes a Global Threat', 'http://doi.org/10.7150/ijbs.45053'), (""Hospital pharmacists' pharmaceutical care for hospitalized patients with COVID-19: Recommendations and guidance from clinical experience"", 'http://doi.org/10.1016/j.sapharm.2020.03.027')]"


In [76]:
risk[['Query','Summary', "Top 5 Articles"]]

Query             risks for neonates, newborns, and pregnant women                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           
Summary           The obstetrical outcomes from pregnant women with SARS-CoV-2 infection appear better tha

In [None]:

Future work

We hope to integrate the results of TF-IDF, LDA, and the excerpt extraction more closely with one another.
More specifically, TF-IDF and LDA be used to drive the subsetting of articles into meaningful groups. 
This would hopefully create more reliable and consistent results in the excerpt extraction and text summarization steps.
Also automate the task using web applications.
