In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# TopiQAL: Topic Modeling based BioBERT Question Answering Model Pipeline

### This notebook is not totally runnable due to training and computations performed on Google Colab TPU/GPUs. Hence for simplicity trained model files have been uploaded. 

BERT imposes a limit on the input sequence. This leads us to another major challenge - choosing contexts to pose questions such that the sum total of lengths of these texts are kept to a limited number. Asking the same set of questions to all papers means higher computation time, and in cases of user interaction, this may be time consuming. There are possibilities of asking questions to contexts that may not contain answers at all, which causes additional overhead both to the user and to the developer curating answers for Kaggle.

We propose to filter out required contexts to ask questions in an interpretable setting, using Topic Modeling approach that is hierarchically performed on Abstracts and Bodies of text in the publications.

* To give a brief overview, much will be discussed as we proceed in the notebook, initially the abstracts and bodies separately based on their sources are modeled using Gensim LDA. 
* The abstract models are used to run inference on the TASK# and it's questions. 
* The obtained topic lists (sorted by probability distribution, and top 2 topics are taken) are used to filter papers that share same 'dominant' topics.
* Obtained dataframes are left joined on original dataframes (filtered by source)
* We pass this through a section filter - now, body topic models are used to run inference on sections of text (separated by '\n'), and pick only those sections that have the related topics to the question. This further reduces the number of sections in the paper, though at times number of papers obtained afer filtering abstracts remain the same.

In [None]:
Image('../input/topiqal/topiqal.png')

In [None]:
from collections import defaultdict
import json
import pandas as pd
from pathlib import Path
import re
from matplotlib import pyplot as plt
import seaborn as sns
from unidecode import unidecode

from IPython.display import Image
import gensim
from gensim import utils
from nltk.stem import PorterStemmer as porter_stemmer
from nltk.tokenize import sent_tokenize, word_tokenize
import numpy as np

#Data Paths
root_path = Path.cwd().parents[0] 
data_dir = root_path / "2020-03-13"

In [None]:
meta_df = pd.read_csv('../input/CORD-19-research-challenge/metadata.csv')

data_sets = ["biorxiv_medrxiv", "comm_use_subset", "noncomm_use_subset", "pmc_custom_license"]

In [None]:
## courtesy: kaggle 


# nltk
import nltk
from nltk.corpus import wordnet
from nltk.corpus import stopwords
from nltk import WordNetLemmatizer
from nltk.tokenize import word_tokenize

"""
TOOLS
"""

def remove_numbers(input):
    """Remove numbers from input"""
    return re.sub(r"(^\d+\s+|\s+\d+\s+|\s+\d+$)", " ", input)


def remove_punctuations(input):
    """Remove punctuations from input"""
    return input.translate(str.maketrans("", "", '!"#$%&\'()*+./:;<=>?@,[\\]^`{|}~')) # all string punctuations except '_' and '-'

def remove_pharentesis(input):
    return re.sub(r"(\(|\)|\[|\])", " ", input)

def remove_diacritics(input):
    """Remove diacritics (as accent marks) from input"""
    return unidecode.unidecode(input)


def remove_white_space(input):
    """Remove all types of spaces from input"""
    input = input.replace(u"\xa0", u" ")  # remove space
    # remove white spaces, new lines and tabs
    return " ".join(input.split())



# Remove empty brackets (that could happen if the contents have been removed already
# e.g. for citation ( [3] [4] ) -> ( ) -> nothing
# https://github.com/jakelever/bio2vec/blob/master/PubMed2Txt.py
def removeBracketsWithoutWords(text):
    fixed = re.sub(r'\([\W\s]*\)', ' ', text)
    fixed = re.sub(r'\[[\W\s]*\]', ' ', fixed)
    fixed = re.sub(r'\{[\W\s]*\}', ' ', fixed)
    return fixed


def clean_text(text):
    #text = text.lower()
    
    text = removeBracketsWithoutWords(text)
    
#     text = removeWeirdBracketsFromOldTitles(text)
    
    text = remove_pharentesis(text)
    
    text = remove_punctuations(text) # powerful
    
    text = remove_numbers(text) # only in case SPACE NUM SPACE
    text = remove_punctuations(text)
#     text = remove_diacritics(text)
    text = remove_white_space(text)
    #tokens = remove_stop_words(text)  # return a list of token
    
    return text


In [None]:
!curl 'https://raw.githubusercontent.com/stopwords-iso/stopwords-en/master/stopwords-en.json' -o '../stopwords.en.json'

with open('../stopwords.en.json') as fopen:
    swords = json.load(fopen)
print(len(swords))

# LDA Topic Modeling


In [None]:

import json
import copy
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.util import ngrams

stops = set(stopwords.words("english"))
wnl = WordNetLemmatizer()

customize_stop_words = [
    'doi', 'preprint', 'copyright', 'peer', 'reviewed', 'org', 'https', 'et', 'al', 'author', 'figure', 
    'rights', 'reserved', 'permission', 'used', 'using', 'biorxiv', 'medrxiv', 'license', 'fig', 'fig.', 'al.', 'Elsevier', 'PMC', 'CZI',
    '-PRON-', 'igg', 'ha', 'wa', 'b', 'granted', 'authorfunder'
]

for word in customize_stop_words:
    stops.add(word)
    
for word in swords: stops.add(word)

# Define function to clean text
def pre_process_text(X):
    cleaned_X = []
    for raw_text in X:
    
        # Convert to lower case
        text = raw_text.lower()
        # Tokenize
        tokens = nltk.word_tokenize(text)
        
        # Keep only words (removes punctuation + numbers)
        token_words = [wnl.lemmatize(w) for w in tokens if w not in stops]

        # Compute bigrams.
        from gensim.models import Phrases

        # Add bigrams and trigrams to docs (only ones that appear 20 times or more).
        bigram = Phrases(token_words, min_count=20)
        for idx in range(len(token_words)):
            for token in bigram[token_words[idx]]:
                print(token)
                if '-' in token:
                    print('bigram present')
                    # Token is a bigram, add to document.
                    token_words[idx].append(token)

        print(token_words)
        # Remove stop words
        meaningful_words = [w for w in token_words if len(w) > 1]
        
        cleaned_X.append(meaningful_words)
    return cleaned_X

## gensim lda tutorial 
## https://radimrehurek.com/gensim/models/
def gensim_process(docs):
    # Tokenize the documents.
    from nltk.tokenize import RegexpTokenizer

    # Split the documents into tokens.
    tokenizer = RegexpTokenizer(r'\w+')
    for idx in range(len(docs)):
        docs[idx] = docs[idx].lower()  # Convert to lowercase.
        docs[idx] = tokenizer.tokenize(docs[idx])  # Split into words.

    # Remove numbers, but not words that contain numbers.
    docs = [[token for token in doc if not token.isnumeric()] for doc in docs]

    # Remove words that are only one character.
    docs = [[token for token in doc if len(token) > 1] for doc in docs]


    from nltk.stem.wordnet import WordNetLemmatizer

    lemmatizer = WordNetLemmatizer()
    docs = [[lemmatizer.lemmatize(token) for token in doc if token not in stops] for doc in docs]

    from gensim.models import Phrases

    # Add bigrams and trigrams to docs (only ones that appear 20 times or more).
    bigram = Phrases(docs, min_count=10)
    for idx in range(len(docs)):
        for token in bigram[docs[idx]]:
            if '_' in token:
                # Token is a bigram, add to document.
                docs[idx].append(token)
                
    return docs


import pyLDAvis.gensim
pyLDAvis.enable_notebook()

## Abstract Topic Modeling 


In [None]:
d = pd.read_csv('../input/processed-cord19/corid.csv')

d_a = d[d['abstract'].notna()]

d_a.groupby('source_x').count()

The above groupby object highlights the distribution of counts of publications from various papers.

Hence, we try to leverage this to build separate topic models that can subsequently filter down contexts for BERT-QA model

### Preprocess Abstracts

In [None]:
d_a['abstract'] = d_a['abstract'].apply(clean_text)


#### Seeding with the same question set

#### Gensim LDA Models help us find clusters of paragraphs that share similar topic, and intra-paragraph topic distribution. We use this clue to to filter down the paragraphs that share topics with the questions of the tasks themselves.

This list below contains questions from all tasks. We preprocess these questions to use as seeding to the topic modeler.

In [None]:
s = ['Range of incubation periods for the disease in humans (and how this varies across age and health status) and how long individuals are contagious, even after recovery?\
Prevalence of asymptomatic shedding and transmission (e.g., particularly children)?\
Seasonality of transmission?\
Physical science of the coronavirus (e.g., charge distribution, adhesion to hydrophilic/phobic surfaces, environmental survival to inform decontamination efforts for affected areas and provide information about viral shedding)?\
Persistence and stability on a multitude of substrates and sources (e.g., nasal discharge, sputum, urine, fecal matter, blood)?\
Persistence of virus on surfaces of different materials (e,g., copper, stainless steel, plastic)?\
Natural history of the virus and shedding of it from an infected person?\
Implementation of diagnostics and products to improve clinical processes?\
Disease models, including animal models for infection, disease and transmission?\
Tools and studies to monitor phenotypic change and potential adaptation of the virus?\
Immune response and immunity?\
Effectiveness of movement control strategies to prevent secondary transmission in health care and community settings?\
Effectiveness of personal protective equipment (PPE) and its usefulness to reduce risk of transmission in health care and community settings?\
Role of the environment in transmission?',
             
'Data on potential risks factors?\
Smoking, pre-existing pulmonary disease?\
Co-infections (determine whether co-existing respiratory/viral infections make the virus more transmissible or virulent) and other co-morbidities?\
Neonates and pregnant women?\
Socio-economic and behavioral factors to understand the economic impact of the virus and whether there were differences.?\
Transmission dynamics of the virus, including the basic reproductive number, incubation period, serial interval, modes of transmission and environmental factors?\
Severity of disease, including risk of fatality among symptomatic hospitalized patients, and high-risk patient groups?\
Susceptibility of populations?\
Public health mitigation measures that could be effective for control?',

'Real-time tracking of whole genomes and a mechanism for coordinating the rapid dissemination of that information to inform the development of diagnostics and therapeutics and to track variations of the virus over time.?\
Access to geographic and temporal diverse sample sets to understand geographic distribution and genomic differences, and determine whether there is more than one strain in circulation. Multi-lateral agreements such as the Nagoya Protocol could be leveraged.?\
Evidence that livestock could be infected (e.g., field surveillance, genetic sequencing, receptor binding) and serve as a reservoir after the epidemic appears to be over.?\
Evidence of whether farmers are infected, and whether farmers could have played a role in the origin.?\
Surveillance of mixed wildlife- livestock farms for SARS-CoV-2 and other coronaviruses in Southeast Asia.?\
Experimental infections to test host range for this pathogen.?\
Animal host(s) and any evidence of continued spill-over to humans?\
Socioeconomic and behavioral risk factors for this spill-over?\
Sustainable risk reduction strategies?',

'Effectiveness of drugs being developed and tried to treat COVID-19 patients.?\
Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.?\
Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.?\
Exploration of use of best animal models and their predictive value for a human vaccine.?\
Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.?\
Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.?\
Efforts targeted at a universal coronavirus vaccine.?\
Efforts to develop animal models and standardize challenge studies?\
Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers?\
Approaches to evaluate risk for enhanced disease after vaccination?\
Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]?',
             
'Resources to support skilled nursing facilities and long term care facilities.?\
Mobilization of surge medical staff to address shortages in overwhelmed communities?\
Age-adjusted mortality data for Acute Respiratory Distress Syndrome (ARDS) with/without other organ failure – particularly for viral etiologies?\
Extracorporeal membrane oxygenation (ECMO) outcomes data of COVID-19 patients?\
Outcomes data for COVID-19 after mechanical ventilation adjusted for age.?\
Knowledge of the frequency, manifestations, and course of extrapulmonary manifestations of COVID-19, including, but not limited to, possible cardiomyopathy and cardiac arrest.?\
Application of regulatory standards (e.g., EUA, CLIA) and ability to adapt care to crisis standards of care level.?\
Approaches for encouraging and facilitating the production of elastomeric respirators, which can save thousands of N95 masks.?\
Best telemedicine practices, barriers and faciitators, and specific actions to remove/expand them within and across state boundaries.?\
Guidance on the simple things people can do at home to take care of sick people and manage disease.?\
Oral medications that might potentially work.?\
Use of AI in real-time health care delivery to evaluate interventions, risk factors, and outcomes in a way that could not be done manually.?\
Best practices and critical challenges and innovative solutions and technologies in hospital flow and organization, workforce protection, workforce allocation, community-based support resources, payment, and supply chain management to enhance capacity, efficiency, and outcomes.?\
Efforts to define the natural history of disease to inform clinical care, public health interventions, infection prevention control, transmission, and clinical trials?\
Efforts to develop a core clinical outcome set to maximize usability of data across a range of trials?\
Efforts to determine adjunctive and supportive interventions that can improve the clinical outcomes of infected patients (e.g. steroids, high flow oxygen)?',             
  
'Guidance on ways to scale up NPIs in a more coordinated way (e.g., establish funding, infrastructure and authorities to support real time, authoritative (qualified participants) collaboration with all states to gain consensus on consistent guidance and to mobilize resources to geographic areas where critical shortfalls are identified) to give us time to enhance our health care delivery system capacity to respond to an increase in cases.?\
Rapid design and execution of experiments to examine and compare NPIs currently being implemented. DHS Centers for Excellence could potentially be leveraged to conduct these experiments.?\
Rapid assessment of the likely efficacy of school closures, travel bans, bans on mass gatherings of various sizes, and other social distancing approaches.?\
Methods to control the spread in communities, barriers to compliance and how these vary among different populations?\
Models of potential interventions to predict costs and benefits that take account of such factors as race, income, disability, age, geographic location, immigration status, housing status, employment status, and health insurance status.?\
Policy changes necessary to enable the compliance of individuals with limited resources and the underserved with NPIs.?\
Research on why people fail to comply with public health advice, even if they want to do so (e.g., social or financial costs may be too high).?\
Research on the economic impact of this or any pandemic. This would include identifying policy and programmatic alternatives that lessen/mitigate risks to critical government services, food distribution and supplies, access to critical household supplies, and access to health diagnoses, treatment, and needed care, regardless of ability to pay.?',
             
'Are there geographic variations in the rate of COVID-19 spread?\
Are there geographic variations in the mortality rate of COVID-19?\
Is there any evidence to suggest geographic based virus mutations?' ,
             
'How widespread current exposure is to be able to make immediate policy recommendations on mitigation measures. Denominators for testing and a mechanism for rapidly sharing that information, including demographics, to the extent possible. Sampling methods to determine asymptomatic disease (e.g., use of serosurveys (such as convalescent samples) and early detection of disease (e.g., use of screening of neutralizing antibodies such as ELISAs).?\
Efforts to increase capacity on existing diagnostic platforms and tap into existing surveillance platforms.?\
Recruitment, support, and coordination of local expertise and capacity (public, private—commercial, and non-profit, including academic), including legal, ethical, communications, and operational issues.?\
National guidance and guidelines about best practices to states (e.g., how states might leverage universities and private laboratories for testing purposes, communications to public health officials and the public).?\
Development of a point-of-care test (like a rapid influenza test) and rapid bed-side tests, recognizing the tradeoffs between speed, accessibility, and accuracy.?\
Rapid design and execution of targeted surveillance experiments calling for all potential testers using PCR in a defined area to start testing and report to a specific entity. These experiments could aid in collecting longitudinal samples, which are critical to understanding the impact of ad hoc local interventions (which also need to be recorded).?\
Separation of assay development issues from instruments, and the role of the private sector to help quickly migrate assays onto those devices.?\
Efforts to track the evolution of the virus (i.e., genetic drift or mutations) and avoid locking into specific reagents and surveillance/detection schemes.?\
Latency issues and when there is sufficient viral load to detect the pathogen, and understanding of what is needed in terms of biological and environmental sampling.?\
Use of diagnostics such as host response markers (e.g., cytokines) to detect early disease or predict severe disease progression, which would be important to understanding best clinical practice and efficacy of therapeutic interventions.?\
Policies and protocols for screening and testing.?\
Policies to mitigate the effects on supplies associated with mass testing, including swabs and reagents.?\
Technology roadmap for diagnostics.?\
Barriers to developing and scaling up new diagnostic tests (e.g., market forces), how future coalition and accelerator models (e.g., Coalition for Epidemic Preparedness Innovations) could provide critical funding for diagnostics, and opportunities for a streamlined regulatory environment.?\
New platforms and technology (e.g., CRISPR) to improve response times and employ more holistic approaches to COVID-19 and future diseases.?\
Coupling genomics and diagnostic testing on a large scale.?\
Enhance capabilities for rapid sequencing and bioinformatics to target regions of the genome that will allow specificity for a particular variant.?\
Enhance capacity (people, technology, data) for sequencing with advanced analytics for unknown pathogens, and explore capabilities for distinguishing naturally-occurring pathogens from intentional.?\
One Health surveillance of humans and potential sources of future spillover or ongoing exposure for this organism and future pathogens, including both evolutionary hosts (e.g., bats) and transmission hosts (e.g., heavily trafficked and farmed wildlife and domestic food and companion species), inclusive of environmental, demographic, and occupational risk factors.?',

'Efforts to articulate and translate existing ethical principles and standards to salient issues in COVID-2019?\
Efforts to embed ethics across all thematic areas, engage with novel ethical issues that arise and coordinate to minimize duplication of oversight?\
Efforts to support sustained education, access, and capacity building in the area of ethics?\
Efforts to establish a team at WHO that will be integrated within multidisciplinary research and operational platforms and that will connect with existing and expanded global networks of social sciences.?\
Efforts to develop qualitative assessment frameworks to systematically collect information related to local barriers and enablers for the uptake and adherence to public health measures for prevention and control. This includes the rapid identification of the secondary impacts of these measures. (e.g. use of surgical masks, modification of health seeking behaviors for SRH, school closures)?\
Efforts to identify how the burden of responding to the outbreak and implementing public health measures affects the physical and psychological health of those providing care for Covid-19 patients and identify the immediate needs that must be addressed.?\
Efforts to identify the underlying drivers of fear, anxiety and stigma that fuel misinformation and rumor, particularly through social media.?' ,
            
'Methods for coordinating data-gathering with standardized nomenclature.?\
Sharing response information among planners, providers, and others.?\
Understanding and mitigating barriers to information-sharing.?\
How to recruit, support, and coordinate local (non-Federal) expertise and capacity relevant to public health emergency response (public, private, commercial and non-profit, including academic).?\
Integration of federal/state/local public health surveillance systems.?\
Value of investments in baseline public health response infrastructure preparedness?\
Modes of communicating with target high-risk populations (elderly, health care workers).?\
Risk communication and guidelines that are easy to understand and follow (include targeting at risk populations’ families too).?\
Communication that indicates potential risk of disease to all population groups.?\
Misunderstanding around containment and mitigation.?\
Action plan to mitigate gaps and problems of inequity in the Nation’s public health capability, capacity, and funding to ensure all citizens in need are supported and can access information, surveillance, and treatment.?\
Measures to reach marginalized and disadvantaged populations.?\
Data systems and research priorities and agendas incorporate attention to the needs and circumstances of disadvantaged populations and underrepresented minorities.?\
Mitigating threats to incarcerated people from COVID-19, assuring access to information, prevention, diagnosis, and treatment.?\
Understanding coverage policies (barriers and opportunities) related to testing, treatment, and care?'
]


questions = gensim_process(copy.deepcopy(s))
questions

seed_topic_list = questions
print(len(seed_topic_list))

### Gensim LDA for abstracts

In this cell, we set priors, and iterate across all sources to train 4 topic models

eta = seed priors to the topic models for question words

In [None]:


sources = ['CZI', 'medrxiv', 'biorxiv', 'PMC']
lda_abstract_models = []

def set_priors(eta, topic, words, p=.8):
    """ for list of words set p(topic)=p
    eta is topic*word matrix with default p=1/topics
    """
    word_indexes = [word2id[w] for w in words if w in word2id]
    eta[:, :] = (1 - p) / (eta.shape[0]-1)
    eta[topic, word_indexes] = p
    
def showtopics(model):
    """ print list of top words for each topic """
    for i in range(model.num_topics):
        print(i, " ".join([id2word[w[0]] for w in model.get_topic_terms(i)]))

    
for source in sources:
    raw_abstracts = list(d_a[d_a['source_x'] == source]['abstract'])
    ppt = gensim_process(copy.deepcopy(raw_abstracts))

    for q in questions:
        if len(q) !=0: ppt.append(q)

    dict_abstracts = gensim.corpora.Dictionary(ppt)
    corpus_abstract = [dict_abstracts.doc2bow(text) for text in ppt]

    id2word = dict((idx, v) for idx, v in enumerate(dict_abstracts.token2id))
    word2id = dict((idx, v) for idx, v in enumerate(id2word))

    seed_topics = {}
    for t_id, st in enumerate(seed_topic_list):
        for word in st:
            if word in word2id: 
                seed_topics[word2id[word]] = t_id
                
    n = len(seed_topic_list)
    eta = np.full((n, len(id2word)), 1/(len(id2word)*n))
    
    for i in range(len(questions)):
        if not set_priors(eta, i, questions[i], 0.99):
            continue
        
        
    # Build LDA model
    lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus_abstract,
                                           id2word=dict_abstracts,
                                           num_topics=n, 
                                           random_state=54654,
                                           update_every=1,
                                            eval_every=20,
                                           chunksize=20,
                                           passes=10,
                                           iterations=100,
                                            alpha='auto',
                                            eta=eta,
                                           per_word_topics=True)
#     lda_model.save('../processed/'+source+'_abstract_model', separately=None)
    lda_abstract_models.append({'model': lda_model, 'source': source, 'dict': dict_abstracts, 'corpus': corpus_abstract})

In [None]:
## https://www.machinelearningplus.com/nlp/topic-modeling-visualization-how-to-present-results-lda-models/

# Sentence Coloring of N Sentences
from matplotlib.patches import Rectangle
import seaborn as sns
import matplotlib.colors as mcolors
import datetime


def sentences_chart(lda_model, corpus, start = 0, end = 20):
    corp = corpus[start:end]
    mycolors = [color for name, color in mcolors.TABLEAU_COLORS.items()]

    fig, axes = plt.subplots(end-start, 1, figsize=(20, (end-start)*0.95), dpi=160)       
    axes[0].axis('off')
    for i, ax in enumerate(axes):
        if i > 0:
            corp_cur = corp[i-1] 
            topic_percs, wordid_topics, wordid_phivalues = lda_model[corp_cur]
            word_dominanttopic = []
            for wd, topic in wordid_topics:
                if len(topic) > 0: word_dominanttopic.append((lda_model.id2word[wd], topic[0]))
            ax.text(0.01, 0.5, "Doc " + str(i-1) + ": ", verticalalignment='center',
                    fontsize=16, color='black', transform=ax.transAxes, fontweight=700)

            # Draw Rectange
            topic_percs_sorted = sorted(topic_percs, key=lambda x: (x[1]), reverse=True)
            ax.add_patch(Rectangle((0.0, 0.05), 0.99, 0.90, fill=None, alpha=1, 
                                   color=mycolors[topic_percs_sorted[0][0]%10], linewidth=2))

            word_pos = 0.06
            for j, (word, topics) in enumerate(word_dominanttopic):
                if j < 14:
                    ax.text(word_pos, 0.5, word,
                            horizontalalignment='left',
                            verticalalignment='center',
                            fontsize=16, color=mycolors[topics%10],
                            transform=ax.transAxes, fontweight=700)
                    word_pos += .009 * len(word)  # to move the word for the next iter
                    ax.axis('off')
            ax.text(word_pos, 0.5, '. . .',
                    horizontalalignment='left',
                    verticalalignment='center',
                    fontsize=16, color='black',
                    transform=ax.transAxes)       

    plt.subplots_adjust(wspace=0, hspace=0)
    plt.suptitle('Sentence Topic Coloring for Documents: ' + str(start) + ' to ' + str(end-2), fontsize=22, y=0.95, fontweight=700)
    plt.tight_layout()
    plt.show()



In [None]:
showtopics(lda_abstract_models[3]['model'])

In [None]:
# sentences_chart(lda_model, corpus_abstract)

Image('../input/topiqal/download.png')

# LDA Abstracts Visualization for Topic Discovery

In [None]:
from IPython.core.display import HTML

HTML('../input/topiqal/czi_abstract_text.html')

In [None]:
HTML('../input/topiqal/biorxiv_abstract_text.html')

In [None]:
HTML('../input/topiqal/medrxiv_abstract_text.html')

In [None]:
HTML('../input/topiqal/pmc_abstract_text.html')

## Let's tackle Task#3 about Vaccine and Therapeutics


In [None]:
## https://www.machinelearningplus.com/nlp/topic-modeling-visualization-how-to-present-results-lda-models/
def topics_per_document(model, corpus, start=0, end=1):
    corpus_sel = corpus[start:end]
    dominant_topics = []
    topic_percentages = []
    for i, corp in enumerate(corpus_sel):
        topic_percs, wordid_topics, wordid_phivalues = model[corp]
        dominant_topic = sorted(topic_percs, key = lambda x: x[1], reverse=True)[0][0]
        dominant_topics.append((i, dominant_topic))
        topic_percentages.append(topic_percs)
    return(dominant_topics, topic_percentages)

          
## https://www.machinelearningplus.com/nlp/topic-modeling-visualization-how-to-present-results-lda-models/
def format_topics_sentences(ldamodel, corpus, texts):
    # Init output
    sent_topics_df = pd.DataFrame()

    # Get main topic in each document
    for i, row_list in enumerate(ldamodel[corpus]):
        row = row_list[0] if ldamodel.per_word_topics else row_list            
        # print(row)
        row = sorted(row, key=lambda x: (x[1]), reverse=True)
        # Get the Dominant topic, Perc Contribution and Keywords for each document
        for j, (topic_num, prop_topic) in enumerate(row):
            if j == 0:  # => dominant topic
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                sent_topics_df = sent_topics_df.append(pd.Series([int(topic_num), round(prop_topic,4), topic_keywords]), ignore_index=True)
            else:
                break
    sent_topics_df.columns = ['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords']

    # Add original text to the end of the output
    contents = pd.Series(texts)
    sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
    return(sent_topics_df)

import logging

logging.getLogger("gensim").setLevel(logging.WARNING)

from tqdm import tqdm
import itertools

topics_list = []

## Choosing TASK 3

q = [b.split('?') for b in s]
proc = gensim_process(copy.deepcopy(q[3]))
prashne = list(itertools.chain(*proc))

df_sources = []
all_paras = [] 

%timeit

for idx, instance in tqdm(enumerate(lda_abstract_models)):
    source = instance['source']
    lda_model = instance['model']
    corpus = instance['corpus']
    dict_abs = instance['dict']
    
    # Question Vector 3.0
    q_vec = dict_abs.doc2bow(prashne)
    q_doc = lda_model[q_vec]
    
    topics_list.append([topic[0] for topic in sorted(q_doc[0], reverse=True, key=lambda x: x[1])[:2]])
    
    dominant_topics, topic_percentages = topics_per_document(model=lda_model, corpus=corpus, end=-1)
    df_ = d_a[d_a['source_x'] == source].reset_index(drop=True)
    
    length = len(dominant_topics)-9
    
    t = [t[1] for t in dominant_topics[:length]]
    df_['dominant_topic'] = t
    df_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, corpus=corpus[:length], texts=df_['body_text'])
    df_sources.append(df_)
    
    topic_list = topics_list[idx]
    print(topics_list[idx])
    # Format
    df_dominant_topic = df_topic_sents_keywords.reset_index()
    df_dominant_topic = df_dominant_topic[:length]
    df_dominant_topic['sha'] = [df_['sha'].iloc[idx] for idx in range(length)]
    df_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text', 'paper_id']

    filtered = df_dominant_topic[df_dominant_topic['Dominant_Topic'].isin(topic_list)]
    print(len(filtered))
    
    body_model = source.lower()+'_lda_body_model'
    lda_body_model = gensim.models.ldamodel.LdaModel.load('../text-processing/processed/'+body_model)
    q_doc = lda_body_model[q_vec]  ## get the dominant topic and query merged 
    body_text_list = [float(topic[0]) for topic in sorted(q_doc[0], reverse=True, key=lambda x: x[1])[:2]]
    
    
    merged = pd.merge(filtered, df_, left_on='paper_id', right_on='sha', how='left')
    text = merged['body_text'].values.ravel().tolist()
    sha = merged['sha'].values.ravel().tolist()
    reduced_para = []

    for paper_id, text in zip(sha, text):
        p_text = text.split('\n')
        p_vec = [dict_body_text.doc2bow(txt) for txt in gensim_process(copy.deepcopy(p_text))]

        length = 0
        context = ""
        for i in range(len(p_vec)):
            txt = p_vec[i]
            topics = [topic_term[0] for topic_term in lda_body_model[txt][0] if topic_term[1] > 0.20 and float(topic_term[0]) in body_text_list]
            if len(topics) == 0:
                continue
            length += 1
            context += p_text[i]

#         print("After filtering: {}".format((len(p_text) - length) * 100/len(p_text)))
        reduced_para.append(context)
    all_paras.append(reduced_para)


## In the above cell we have collected all the paragraphs from the papers that have topics similar to the question

### The cell below flattens the list of lists into a flattened list and prepares the context for feeding into BERT QA Model inference.

In [None]:
prashnegalu = list(itertools.chain(*all_paras))

with open('squad.txt', 'w+') as fp:
    for i in range(len(prashnegalu)):
        fp.write(str(prashnegalu[i]))

In [None]:
with open('../input/topiqal/squad.txt', 'r') as f:
    d = f.readlines()
    
print(d[:2])

Adapted from Google TPU Tutorial + Pragnakalpa Tech Labs

## **BioBERT Fine-tuning on SQUAD 2.0 and Prediction on COVID-19 BioMedical Datasets using Cloud TPU** 
**[Inferences on COVID-19 Open Research Dataset Challenge (CORD-19) Dataset](https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge)**

---

### **Overview**
**BERT**, or Bidirectional Embedding Representations from Transformers, is a new method of pre-training language representations which obtains state-of-the-art results on a wide array of Natural Language Processing (NLP) tasks. The academic paper can be found here: https://arxiv.org/abs/1810.04805.

**SQuAD** Stanford Question Answering Dataset is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.

**[BioBERT](https://github.com/dmis-lab/biobert)** : This repository provides the code for fine-tuning BioBERT, a biomedical language representation model designed for biomedical text mining tasks such as biomedical named entity recognition, relation extraction, question answering, etc. The paper [BioBERT: a pre-trained biomedical language representation model for biomedical text mining](https://academic.oup.com/bioinformatics/article/36/4/1234/5566506) can be referenced for more details. This project is done by [DMIS-Lab](https://dmis.korea.ac.kr/).


### **Motivation**

> The main idea is to be able to ask questions related to transmission, cure, therapeutics, vaccines, risk factors, social and economic impacts.


### **Download the BioBERT Pretrained Model**


BioBERT repo provides five versions of pre-trained weights. Pre-training was based on the [original BERT code](https://github.com/google-research/bert) provided by Google, and training details are described in our paper. Currently available versions of pre-trained weights are as follows:

* **[BioBERT-Base v1.1 (+ PubMed 1M)](https://drive.google.com/file/d/1R84voFKHfWV9xjzeLzWBbmY1uOMYpnyD/view?usp=sharing)** - based on BERT-base-Cased (same vocabulary)
* **[BioBERT-Large v1.1 (+ PubMed 1M)](https://drive.google.com/file/d/1GJpGjQj6aZPV-EfbiQELpBkvlGtoKiyA/view?usp=sharing)** - based on BERT-large-Cased (custom 30k vocabulary), [NER/QA Results](https://github.com/dmis-lab/biobert/wiki/BioBERT-Large-Results)
* **[BioBERT-Base v1.0 (+ PubMed 200K)](https://drive.google.com/file/d/17j6pSKZt5TtJ8oQCDNIwlSZ0q5w7NNBg/view?usp=sharing)** - based on BERT-base-Cased (same vocabulary)
* **[BioBERT-Base v1.0 (+ PMC 270K)](https://drive.google.com/file/d/1LiAJklso-DCAJmBekRTVEvqUOfm0a9fX/view?usp=sharing)** - based on BERT-base-Cased (same vocabulary)
* **[BioBERT-Base v1.0 (+ PubMed 200K + PMC 270K)](https://drive.google.com/file/d/1jGUu2dWB1RaeXmezeJmdiPKQp3ZCmNb7/view?usp=sharing)** - based on BERT-base-Cased (same vocabulary)


I have used **[BioBERT-Large v1.1 (+ PubMed 1M)](https://drive.google.com/file/d/1GJpGjQj6aZPV-EfbiQELpBkvlGtoKiyA/view?usp=sharing)**

---



### **Clone the BERT github repository**

**The BioBERT has been cloned and uploaded to the GCS Bucket. We still need BERT to access SQUAD related files.**



In [None]:
# from google.colab import auth
# auth.authenticate_user()

!git clone https://github.com/google-research/bert.git

In [None]:
!pip install tensorflow==1.15.0

# Fine-tuning with BioBERT

### **Set up TPU environment**

In [None]:


import datetime
import json
import os
import pprint
import random
import string
import sys
import tensorflow as tf

# assert 'COLAB_TPU_ADDR' in os.environ, 'ERROR: Not connected to a TPU runtime; please see the first cell in this notebook for instructions!'
# TPU_ADDRESS = 'grpc://' + os.environ['COLAB_TPU_ADDR']
# print('TPU address is => ', TPU_ADDRESS)

# from google.colab import auth
# auth.authenticate_user()
# with tf.Session(TPU_ADDRESS) as session:
#   print('TPU devices:')
#   pprint.pprint(session.list_devices())

#   # Upload credentials to TPU.
#   with open('/content/adc.json', 'r') as f:
#     auth_info = json.load(f)
#   tf.contrib.cloud.configure_gcs(session, credentials=auth_info)
  # tfio.gcs.configure_colab_session(session, credentials=auth_info)
  # Now credentials are set for all future sessions on this TPU.

**Download the SQUAD 2.0 Dataset**

In [None]:
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json



### **Create output directory** 


> Created output directory at GCS (Google Cloud Storage) bucket, where fine_tuned model resides after training completion. 

> Also need to move Pre-trained Model at GCS (Google Cloud Storage) bucket, as Local File System is not Supported on TPU. 


In [None]:

BUCKET = 'xxx' #@param {type:"string"}
assert BUCKET, '*** Must specify an existing GCS bucket name ***'
output_dir_name = 'xxx' #@param {type:"string"}
BUCKET_NAME = 'gs://{}'.format(BUCKET)
OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET,output_dir_name)
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

### **Training**

In [None]:
!python run_squad.py \
    --vocab_file='biobert_large/vocab_cased_pubmed_pmc_30k.txt' \
    --bert_config_file='biobert_large/bert_config_bio_58k_large.json' \
    --init_checkpoint='bio_bert_large_1000k.ckpt' \
    --do_train=True \
    --train_file=train_v2.0.json \
    --do_predict=True \
    --predict_file=test_v2.0.json \
    --train_batch_size=24 \
    --learning_rate=3e-5 \
    --num_train_epochs=100 \
    --use_tpu=True \
    --tpu_name=grpc://x.x.x.x:port \
    --max_seq_length=384 \
    --doc_stride=128 \
    --version_2_with_negative=True \
    --output_dir=OUTPUT_DIR

## Both BERT and BioBERT were fine-tuned on SQUAD Dataset, however evaluation highlighted BioBERT had a higher F1-Score.

{
  "exact": 81.35264886717763,<br/>
  "f1": 84.39265216044659,<br/>
  "total": 11873,<br/>
  "HasAns_exact": 79.21727395411605,<br/>
  "HasAns_f1": 85.30599849881634,<br/>
  "HasAns_total": 5928,<br/>
  "NoAns_exact": 83.48191757779647,<br/>
  "NoAns_f1": 83.48191757779647,<br/>
  "NoAns_total": 5945<br/>
}

### **Create Input File**


> We are creating input_file.json as a blank json file and then writing the data in SQUAD format in the file.

> The context is obtained from CORID-19 Corpus. The steps were:
  1. Topic Modeling was done on abstracts, and once model converged to coherent topics, dominant topics were assigned to each abstract. 
  2. In this example, obtained an article that fell to the same topic cluster. 


In [None]:


!touch input_file.json

!pip install gcsfs
!pip install fastparquet

In [None]:
import fastparquet
import pandas as pd

df = pd.read_parquet('../input/topiqal/documents_processed.parquet')

# Questions[3]
questions = ['Effectiveness of drugs being developed and tried to treat COVID-19 patients.',
 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.',
 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.',
 'Exploration of use of best animal models and their predictive value for a human vaccine.',
 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.',
 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.',
 'Efforts targeted at a universal coronavirus vaccine.',
 'Efforts to develop animal models and standardize challenge studies',
 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers',
 'Approaches to evaluate risk for enhanced disease after vaccination',
 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]']


import uuid

def build_qa():
    qas = []
    for i in range(len(questions)):
        d = dict()
        d["question"] = str(questions[i])
        d["id"] = str(uuid.uuid4())
        d["is_impossible"] = ""
        qas.append(d)

    return qas


## The contexts data resembles this, except that we read from the file 
## Below is the instancean
# contexts = ['International Virus Classification Commission (ICTV) classified 2019-nCoV as Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2) on February 11, 2020. At the same time, WHO named the disease caused by 2019-nCoV as COVID-19. Common symptoms of a person infected with coronavirus include respiratory symptoms, fever, cough, shortness of breath, and dyspnea. In more severe cases, infection can cause pneumonia, severe acute respiratory syndrome, kidney failure, and even death. There is currently no specific medicine or treatment for diseases caused by SARS-CoV-2 . In the fight against coronavirus, scientists have come up with three strategies for developing new drugs . As  Whether the screened anti-viral drugs really work on these targets needs further verification. We also do not recommend the application of new coronavirus pneumonia to compounds for which no target has been predicted. Nsp3b and E-channel. But we need to do further experiments to verify this conclusion. In ',
#  'Since December 2019, Wuhan and gradually other places of China have experienced an outbreak of pneumonia epidemic caused by the 2019 novel coronavirus (2019-nCoV, later named SARS-CoV-2). The World Health Organization has declared the current outbreak of COVID-19 in China as a Public Health Emergency of International Concern. As of 10:00 Feb 13, 2020, the epidemic has caused 1366 deaths out of 59 834 confirmed and 16 067 suspected cases. Some unprecedented measures were taken to stop the spread of the virus including cancelling of gatherings, extending the Chinese New Year holidays, and limiting the number of people in public places (e.g. train stations and airports). The outbreak itself and the control measures may lead to widespread fear and panic, especially stigmatization and social exclusion of confirmed patients, survivors and relations, which may escalate into further negative psychological reactions including adjustment disorder and depression. Sudden outbreaks of public health events always pose huge challenges to the mental health service system. Examples include the HIV/AIDS epidemic that captivated world attention in the 1980s and 1990s, the severe acute respiratory syndrome (SARS) in 2002 and 2003, the H1N1 influenza pandemic of 2009, the Ebola virus outbreak in 2013, and the Zika virus outbreak in 2016. During these epidemics, the consequences on the psychosocial wellbeing of at-risk communities are sometimes largely overlooked, especially in the Ebola-affected regions, where few measures were taken to address the mental health needs of confirmed patients, their families, medical staffs or general population. The absence of mental health and psychosocial support systems and the lack of well-trained psychiatrists and/or psychologists in these regions increased the risks of psychological distress and progression to psychopathology. The lack of effective mental health systems added to the poverty in Sierra Leone and Liberia. In China, the mental health service system has been greatly improved after several major disasters, especially the Wenchuan earthquake. In the process of dealing with group crisis intervention, various forms of psychosocial intervention services have been developed, including the intervention model of expert-coach-teacher collaboration after the Wenchuan earthquake 10 and the equilibrium psychological intervention on people injured in the disaster incident after the Lushan earthquake. With the support for remote psychological intervention provided by the development of Internet technology, especially the widespread application of 4G or 5G networks and smartphones, we developed a new intervention model to handle the present COVID-19 public health event. This new model, one of West China Hospital, integrates physicians, psychiatrists, psychologists and social workers into Internet platforms. We propose that the psychological crisis intervention should be dynamic, adapted to suit different stages of the epidemic, i.e., during and after the outbreak. During the outbreak, mental health professionals should actively participate in the overall intervention process for the disease, so that the mental health and psychosocial response can be mobilized in a timely fashion. Specifically, psychological crisis interventions should be integrated into the treatment of pneumonia and blocking of the transmission routes. In this stage, psychological crisis intervention should include two simultaneous activities: (1) intervention for fear of disease, carried out mainly by physicians and assisted by psychologists; During the epidemic, rapid integration of the government and social forces into the Internet framework can maximize effective management of the psychological crisis. We established a pyramid structure of psychological crisis management with government as the core leader. At the bottom of the pyramid are communities, which mainly provide psychosocial support. Psychological assistance (such as hotline, online consulting) is used to identify and help the target groups who need intervention. Through the Huayitong app and Psyclub applet (two integrated APPs for online registration, appointment, payment and other functions for West China Hospital and Sichuan psychological consultant platform), telephone hotline and WeChat platform, we quickly organized physicians at all levels of the West China Hospital (including retired professors) and psychologists from all over Sichuan Province to form psychological rescue teams to formulate solutions (e.g. developing technical guidelines and training programs, starting online consultation and setting up problem feedback mechanisms). Psychological rescue teams conduct crisis interventions for confirmed patients and front-line staff. The expert team at the top of the pyramid provide health education and training during the whole process . How to quickly identify the emotional and stress problems of individuals is an important part of basis for psychological intervention. We screened the mental health status of suspected cases, medical staffs and general population via WeChat platform and/or telephone by using questionnaires (e.g. Mood Index Questionnaire, Patient Health Questionaire-9) as the evaluation tool. Proper intervention strategies were chosen based on the screening results. Follow-up is performed regardless of whether the individual reports mental health problems or not. The process and content of psychological intervention is shown in and . After the epidemic outbreak, psychosocial support mainly focuses on the quarantined people and medical staffs working for them . Social support and psychological intervention are mostly provided by family members, social workers, psychologists, and psychiatrists to isolated patients, suspected patients, and close contacts, primarily through telephone hotline and Internet (e.g. WeChat, APPs). Medical staffs working for the quarantined are the special group who need a lot of social support, and they are also an important force to provide social support for the isolated patients. To guarantee their continued effective work, their mental health status should be monitored and a continuum of timely interventions should be made available to support them. The Anticipated, Plan and Deter (APD) Responder Risk and Resilience Model is an effective method for understanding and managing psychological impacts among medical staffs, including managing the full risk and resilience in the responder "hazard specific" stress. In the APD process, medical staffs receive a pre-event stress training focusing on the psychosocial impact of high-casualty events on the hospital and field disaster settings. During the training, participants are given the chance to develop a "personal resilience plan", which involves identifying and anticipating response challenges. After that they should learn to use it in real intervention response.',
#  'Coronavirus disease 2019 (COVID-2019) has been recognized as a global threat, and several studies are being conducted using various mathematical models to predict the probable evolution of this epidemic. These mathematical models based on various factors and analyses are subject to potential bias. Here, we propose a simple econometric model that could be useful to predict the spread of COVID-2019. We performed Auto Regressive Integrated Moving Average (ARIMA) model prediction on the Johns Hopkins epidemiological data to predict the epidemiological trend of the prevalence and incidence of COVID-2019. For further comparison or for future perspective, case definition and data collection have to be maintained in real time. © 2020 The Authors. Published by Elsevier Inc. This is an open access article under the CC BY license (http://creativecommons. org/licenses/by/4.0/). The daily prevalence data of COVID-2019 from January 20, 2020 to February 10, 2020 were collected from the official website of Johns Hopkins University (https://gisanddata.maps.arcgis.com/apps/ opsdashboard/index.html), and Excel 2019 was used to build a time-series database [1]. ARIMA model was applied to a dataset consisting of 22 number determinations. shows that the overall prevalence of COVID-2019 presented an increasing trend that is reaching the epidemic plateau. The difference between cases of one day and cases of the previous day D(Xn-Xn-1) showed a nonconstant increase in the number of confirmed cases. Descriptive analysis of the data was performed to evaluate the incidence of new confirmed cases of COVID-2019 and to prevent eventual bias. The ARIMA model includes autoregressive (AR) model, moving average (MA) model, and seasonal autoregressive integrated moving average (SARIMA) model . The Augmented Dickey-Fuller (ADF) unit-root test helps in estimating whether the time series is stationary. Log transformation and differences are the preferred approaches to stabilize the time series . Seasonal and nonseasonal differences were used to stabilize the term trend and periodicity. Parameters of the ARIMA model were estimated by autocorrelation function (ACF) graph and partial autocorrelation (PACF) correlogram. To determine the prevalence of COVID-2019, ARIMA (1,0,4) was selected as the best ARIMA model, while ARIMA (1,0,3) was selected as the best ARIMA model for determining the incidence of COVID-2019. Gretl2019d statistical software was used to perform Specifications Infectious Diseases Specific subject area Econometric models applied to infectious diseases epidemiological data to forecast the prevalence and incidence of COVID-2019 Type of data Chart Graph data were acquired Gretl 2019d http://gretl.sourceforge.net/win32/index_it.html Data format Data are in raw format and have been analyzed. An Excel file with data has been uploaded. Parameters for data collection Parameters used for ARIMA were model ARIMA (1,2,0) and ARIMA (1,0,4) Description of data collection The daily prevalence data of COVID-2019 from January 20, 2020 to February 10, 2020 were collected from the official website of Johns Hopkins university (https://gisanddata. maps.arcgis.com/apps/opsdashboard/index.html), and Excel 2019 was used to build a time-series database. Descriptive analysis of the data was performed, and to evaluate the incidence of new confirmed cases of COVID-2019 and to prevent eventual bias, the difference between the cases confirmed on that day and the cases confirmed on the previous day were calculated D(X n -X n-1 ). Raw data can be retrieved from the Github repository https://github.com/ CSSEGISandData/COVID-19 Value of the Data These data are useful because they provide a forecast for COVID-2019 epidemic, thus representing a valid and objective tool for monitoring infection control. All institutions involved in public health and infection control can benefit from these data because by using this model, they can daily construct a reliable forecast for COVID-2019 epidemic. The additional value of these data lies in their easy collection and in the possibility to provide valid forecast for COVID-2019 daily monitoring after the application of the ARIMA model. These data represent an easy way to evaluate the transmission dynamics of COVID-2019 to verify whether the strategy plan for infection control or quarantine is efficient. statistical analysis on the prevalence and incidence datasets, and the statistical significance level was set at 0.05. A previous study was considered as reference for the methodology of the analysis . Logarithmic transformation was performed to evaluate the influence of seasonality on the forecast. The correlogram reporting the ACF and PACF showed that both prevalence and incidence of COVID-2019 are not influenced by the seasonality. The forecast of prevalence and incidence data with relative 95% confidence intervals are reported in . Although more data are needed to have a more detailed prevision, the spread of the virus seems to be slightly decreasing. Moreover, although the number of confirmed cases is still increasing, the incidence is slightly decreasing. If the virus does not develop new mutations, the number of cases should reach a plateau ( . The forecast and the estimate obtained are influenced by the "case" definition and the modality of data collection. For further comparison or for future perspective, case definition and data collection must be maintained in real time. ',
#  "Using 2019 data from the International Air Transport Association (IATA), we identified all cities in China that received at least 100 000 airline passengers from Wuhan during February through April 2019. In a scenario where these cities might experience local epidemics, we analyzed the volumes of airline passengers to international destinations from February to April 2019. To generate these estimates, we used anonymized, passenger-level flight itinerary data from IATA, comprising both commercial flights and scheduled charter flights. These data account for ∼90% of global air travel volumes, with the remaining volumes modelled using market intelligence. We report the top 50 international destination cities of passengers arriving from nine cities in mainland China, plus Hong Kong, and present the corresponding infectious disease vulnerability index (IDVI) for each receiving country. The IDVI is a validated measure of a country's capacity to manage infectious disease threats, and utilizes multiple indicators including health, political and economic metrics. Scores range from 0 to 1 with higher scores representing a greater capacity to cope with epidemic threats. We analyzed international airline passenger trips from the following 10 cities: Wuhan, Beijing, Shanghai, Kunming, Chengdu, Xiamen, Haikou, Guangzhou, Shenzhen and Hong There currently are numerous unknowns including the presumed animal origins of the virus, the efficiency of humanto-human transmission, an understanding of the full spectrum of clinical illness and an incomplete epidemiological picture of disease activity in China. While our analysis does not account for the potential effects of the epidemic on changing travel behaviours, it reflects worldwide flows of airline travellers at the same time of year in 2019, including the Lunar New Year in February 2019. At the time of writing, flights and land transportation in and out of Wuhan have been suspended, and it is uncertain how this will impact disease transmission in China. Our findings could support public health planning and readiness for different scenarios should the current epidemic spread more widely across mainland China and neighbouring cities.",
#  "While specific vaccines and antiviral agents are the most effective methods to prevent and treat viral infection, there are not yet effective treatments that target the 2019-nCoV. Development of these treatments may require months or years, meaning that a more immediate treatment or control mechanism should be found if possible. Herbs used in traditional Chinese medicine present a potentially valuable resource to this end. The effectiveness of herbal treatment to control contagious disease was demonstrated during the 2003 severe acute respiratory syndrome (SARS) outbreak . As such, the Chinese government is encouraging the use of herbal plants in fighting this new viral pneumonia. However, the application of herbal treatment is mainly guided by the type of herb (based on the catalogue of classic literature on herbs) and the patient's symptoms or signs. There is often not enough information to predetermine whether the herbs in question can directly target the viral cause, in other words, herbal usage is generally not guided by viral pathology. We think more detailed knowledge about the direct antiviral effects of different plants would be greatly helpful to the doctors selecting them. It is a challenge to screen out the herbs containing anticoronavirus (2019-nCoV) compounds from the large number of those possibly being used for patients infected with this pathogen, especially in very short time. Here, we propose two principles to guide such work: oral effectiveness and traditional usage compatibility. The first principle refers to the fact that most Chinese herbal plants are orally ingested after boiling with water, meaning that the anti-coronavirus (2019-nCoV) ingredients in selected plants should be absorbable via oral preparation. The second principle recognizes that candidate plants should be consistent with the type classifications for traditional herbal usage, since type-guided applications are integral to herbal use, as mentioned above. Following these two principles, we used a 6-step selection process (3 for each principle), including drug-likeness, evaluation of oral bioavailability, molecular docking, network pharmacology analysis and other methods to identify herbs that have both a high possibil-ity of containing effective anti-coronavirus (2019-nCoV) compounds and are classified as treating virus-caused respiratory infection. Since Chinese herbal treatments are always taken orally after boiling with water, an in silico integrative model of absorption, distribution, metabolism and excretion (ADME) was used to screen for natural compounds that may be bioactive via oral administration. The indices used for the screening include evaluation of oral bioavailability, Caco-2 permeability, drug-like value, and drug half-life. The threshold values indicating effectiveness for these four indices were > 30%, > À0.4, > 0.18 and > 3 h, respectively, as recommended by Hu et al . The values of these four indices can be obtained from the TCMSP database. Herbs were selected through three steps. (1) Primary selection: molecules chosen from the above steps were used as input for the TCMSP, ETCM and SymMap to search for plants containing that input and the plants were filtered by the numbers of antiviral compounds they contain. Those containing 2 or more antiviral compounds were selected for the next step. (2) Classic usage catalogue cross-reference: only herbs traditionally used to treat viral respiratory infection were retained for further study. The TCMSP provided the main components of each herb and the protein targets for each component. We identified the reported chemical constituents for each plant in the final analysis and used the ADME indices listed above to find the orally absorbable and drug-like compounds for the plant. The protein targets of these compounds were downloaded from the TCMSP database. All protein targets for each individual plant were used as input for the String online server (https://string-db.org/) to perform proteinprotein interaction analysis and pathway enrichment. Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways enriched (with P < 0.01) by the input were downloaded. All data were processed using the statistical language R (3.6.2), unless otherwise specified. We received 261 hits from conducting our search in the PubMed database. After careful evaluation of the abstracts from these citations we downloaded and carefully analyzed the full text of 23 highly relevant papers. The natural compounds reported to have biologically confirmed anti-coronavirus activity were identified and then compared to the ingredients listed in three Chinese herbal databases. The result was 115 overlapping ingredients, which we used for further testing ). Step 2 Step 3 Step 2 Step 3 Step 1 Each of the potentially effective herbal remedies contains many ingredients in addition to the antiviral ones found here. Thus, the general effects of each plant should be examined by combining the effects of all of the orally absorbable and biologically active ingredients in it. To evaluate the possible general in vivo effects of each of our identified herbs, we used the ADME indices listed above to examine each of the orally absorbable and drug-like ingredients recorded in the TCMSP database for each plant. We then extracted the target proteins for each ingredient which had passed the screening process. All proteins belonging to a single plant were combined as input on the online protein-protein interaction analysis server, String, to find the pathway enrichment. For the 26 herbs, about 1/3 of the top 30 KEGG-enriched pathways (mean = 11) were related to regulating viral infection, immune/inflammatory reactions and hypoxia response, indicating that they are potentially effective treatments for viral respiratory infection ( ). Note that some of the herbal plants selected here had been reported to be effective for SARS-CoV infection in ). Two principles guided our screening work. The first is that the anti-coronavirus (2019-nCoV) components contained in the source plants should be absorbable via oral prescription. This principle requires that the herbs selected should contain biologically proven anti-coronavirus (2019-nCoV) ingredients, and that these natural compounds should pass the drug-likeness and oral bioavailability evaluations. Therefore, we conducted a three-step screening process. First, we extracted natural compounds verified in PubMed as being effective in treating SARS or MERS coronavirus and then cross-checked these compounds in the Chinese herbal databases. There were 115 overlapping compounds. This method was an expeditious way to identify natural components both preexisting in Chinese herbal treatment and having a high possibility of anti-coronavirus (2019-nCoV) activity. This is important, as the anti-coronavirus effects of the selected compounds have been biologically confirmed, and the genetic similarities between coronavirus (2019-nCoV) and SARS or MERS coronavirus are high . The anti-coronavirus effects of the natural compounds screened by the above method have been mainly confirmed in vitro by direct loading onto cultured cells, thus it does not guarantee their effectiveness in vivo, especially with oral preparation-the principal way in which Chinese herbals are administered. Therefore, to meet the first principle, we ran ADME filters on the natural compounds selected by 4 indices, as used by Hu et al. . Among the 115 compounds highlighted by our first step, only 13 passed this screening, showing the necessity of such a test. Our second principle for screening should also be emphasized and elaborated upon. It states that the selected herbal plants must conform to traditional usages. There are many kinds of Chinese herbs that have been used for thousands of years. Based on this rich history and experience, Chinese herbal medicines are divided into different types, each type dedicated to certain kinds of diseases. Ignoring these grouping guidelines can lead to serious side effects. Therefore, as a further condition for the medicine screened here, we verified that they have been routinely used to treat viral pneumonia. To meet this principle, we conducted another threestep screening process for the herbal plants. Of course, it should be pointed out that Chinese herbs that have not been identified through this screening process may still have beneficial effects. Further, considering that the biologically validated natural compounds reported in the literature cannot cover all antiviral natural compounds, and the natural compounds included in the Chinese medicine database are not complete, the process that we have followed may have excluded herbs that would be well suited to this treatment. Nevertheless, the purpose of this screening was to provide a rational approach for selecting Chinese herbal medicines with a high potential efficacy in treating 2019-nCoV and related viruses. The specific dosage and usage of each herb should be determined based on patients' manifestations. Finally, the key step in this screening was molecular docking. The 3D structures of the proteins used here are based on reported gene sequences. If the virus mutates during transmission, a new screening is recommended. In conclusion, this work has identified several Chinese medicinal plants classified as antiviral/pneumonia-effective that might directly inhibit the novel coronavirus, 2019-nCoV. Additionally, we propose screening principles and methods which may provide guidance in screening antiviral drugs from other natural drug databases. DZ conceived the study, participated in its design, coordination, and all the work processes. KW participated in herbal selection. XZ participated in data collection and network pharmacology analysis. SD helped to collect data. BP helped to draft the manuscript. ",
#  "This study was conducted in accordance with the Declaration of Helsinki and was approved by the National Health Commission of China and Ethics Commission of the Jin Yin-tan Hospital of Wuhan (No. KY-2020-01.01). The requirement for written informed consent was waived given the context of emerging infectious diseases. Bronchoalveolar lavage fluid (BAL) samples were collected from five patients hospitalized with pneumonia in Jin Yintan Hospital of Wuhan, Wuhan, Hubei province, China from December 18 to 29, 2019. Information was gathered, including clinical data, demographic characteristics, underlying medical conditions, clinical signs and symptoms, chest radiographic findings, clinical laboratory testing results, traveling history, recent animal exposure, and outcomes. The data collected for the cases were deemed by the National Health Commission of the People's Republic of China as the contents of a public health outbreak investigation. Spot slides were prepared by applying 20 mL of the virusinfected or non-infected cell suspension onto 12-well Teflon-coated slides. The cells were fixed with 4% paraformaldehyde in 1Â phosphate-buffered saline (PBS) for 30 min, washed three times with PBS, blocked, and stained with serum from a convalescent patient or serum from a healthy person for 30 min at 37°C at a dilution of 1:200. Goat anti-human immunoglobulin G conjugated with fluorescein isothiocyanate was used as the secondary antibody (Jackson Immuno Research Laboratories, Inc., West Grove, PA, USA). Nuclei and the cytoplasm were counterstained with 4 0 ,6 0 -diamidino-2phenylindole and Evans blue (Sigma-Aldrich, St. Louis, MO, USA). Fluorescent images were obtained and analyzed using laser-scanning confocal microscopy (Airyscan LSM880, Zeiss, Berlin, Germany). Patient 1 was a 65-year-old man who reported a high fever and cough, with little sputum production, at the onset of illness. He had a continuous fever and developed severe shortness of breath 16 days later. He was a vendor at the Huanan Seafood Market, Wuhan, Hubei Province, China. Patient 2, a 49-year-old woman, presented with high fever and dry cough. Five days later, she developed dyspnea and was admitted to the hospital. She was also a worker in the Huanan Seafood Market. Patient 3 was a 52-year-old woman who did not report any market exposure. She was admitted to hospital because of fever, cough, and groundglass opacity in the chest computed tomography scan. Patient 4 was a 41-year-old man who also presented with high fever and dry cough at the onset of the illness. He developed acute respiratory distress syndrome 7 days later. This patient had no known history of exposure to the Huanan Seafood Market. Patient 5, a 61-year-old man, was admitted to a local hospital with a 7-day history of fever, cough, and dyspnea. He also worked in the market. With regards to medical history, Patient 4 had hypertension, and Patient 5 had chronic liver disease and abdominal myxoma, whereas none of the other patients had a record of underlying diseases. The demographic and clinical characteristics of the five patients are summarized in . A substantial proportion of all sequencing reads mapped to the newly reported CoV genome (BWA mem, version: 0.7.12), ranging from 71,883 (0.27% among all reads) in Patient 4 to 37,247,818 (85%) in Patient 5. In addition, very few reads mapped to known bacterial pathogens, including Streptococcus, Acinetobacter baumannii, and Pseudomonas [ -E]. The clinical features and laboratory test results of the five patients are summarized in . Fever, cough, and dyspnea were the most common symptoms. The white blood cell counts varied among these patients, but the lymphocyte counts were generally low. The alanine aminotransferase and serum creatine levels were normal or only slightly increased. Bilateral ground-glass opacities and consolidation were observed on chest radiography from two representative patients, Patient 2 based on aortic arch scan [ ] and pulmonary vein scan [ ] on day 10 after symptoms onset and Patient 5 taken on day 12 [ ] and 13 [ ] after symptoms onset. Several complications were observed in these patients. Four of the five patients (except for Patient 3) developed acute respiratory distress syndrome requiring oxygen therapy, and two patients were given extracorporeal membrane oxygenation. Two patients (Patients 1 and 5) experienced secondary infections, and Patient 5 later developed septic shock as well as acute kidney injury, and ultimately died of multi-organ failure. Patient 3 was discharged on January 8, 2020 (day 17 after symptoms onset). The other three patients were still hospitalized at the time of manuscript preparation. The treatments for these patients were shown in . Owing to the lack of epidemic information at present, the transmission modes of the novel CoV remain obscure. It is notable that three of the five patients had a history of recent exposure to a seafood market in Wuhan. However, the origin of infection is unknown at the time of manuscript preparation. It is assumed that the zoonotic CoV jumped to humans through an intermediate host; for example, camel is suspected as the intermediate host of MERS-CoV, whereas the palm civet may have contributed to the interspecies transmission of SARS-CoV to humans. Bat CoVs may evolve to adapt to using humans as a host during their circulation in a mammalian host, thereby enabling them to effectively infect humans. However, two of our patients did not have a history of exposure to the seafood market. Therefore, further investigation will be needed to determine the potential of multiple infection sources responsible for this uncommon outbreak. One of the most striking and concerning features of this virus is its ability to cause severe respiratory syndrome. The disease progressed rapidly with a major presentation of lower respiratory pathology. Notably, no obvious upper respiratory tract symptoms such as a sore throat and rhinorrhea were present in these patients. Therefore, further exploration is needed on the distribution of the viral receptor in the organs to potentially account for pathogenesis development. In addition, the possibility of unrecognized mild infections or subclinical infections should be clarified, as identification of such infections is critical to control spread of the disease. Development of serological assays would be largely beneficial to detect such types of infection at the population level. In conclusion, we identified a novel bat-borne CoV associated with a severe and fatal respiratory disease in humans. The emergence of this virus poses a potential threat to public health. Therefore, clarification of the source and transmission mode of these infections is urgently needed to prevent a potential epidemic.",
#  'Building on our previous experience collating news reports to monitor transmission of Ebola virus, here we present an effort to compile individual patient information and subnational epidemic curves on COVID-19 from a variety of online resources. Data were made publicly available in real time and were used by the infectious disease modelling community to generate and compare epidemiological estimates relevant to interventions. We describe the data generation process and provide an early analysis of age patterns of COVID-19, case counts across China and inter nationally, and delays between symptom onset, admissions to hospital, and reporting, for cases reported until Jan 31, 2020. In this population-level observational study, we used crowdsourced reports from DXY.cn, a social network for Chinese physicians, health-care professionals, pharmacies, and health-care facilities established in 2000. This online platform is providing real-time coverage of the COVID-19 outbreak in China, obtained by collating and curating reports from news media, government television, and national and provincial health agencies. The information reported includes time-stamped cumulative counts of COVID-19 infections, outbreak maps, and realtime streaming of health authority announcements in Chinese (directly or through state media). Every report is linked to an online source, which can be accessed for more detailed information on individual cases. These are publicly available, de-identified patient data reported directly by public health authorities or by state media. No patient consent was needed and no ethics approval was required. We closely monitored updates on DXY.cn between Jan 20, 2020, and Jan 31, 2020, to extract key information on individual patients in near real-time, and reports of daily case counts. For individual-level patient data, we used descriptions from the original source in Chinese to retrieve age, sex, province of identification, travel history, reporting date, dates of symptom onset and seeking care at a hospital or clinic, and discharge status, when available. Individual-level patient data were formatted into a line-list database for further quantitative analysis. Individual-level patient data were entered from DXY.cn by a native Chinese speaker (KS), who also generated an English summary for each patient. Entries were checked by a second person (JC). Since DXY.cn primarily provides  Evidence before this study An outbreak of coronavirus disease 2019 (COVID-19) was recognised in early January, 2020, in Wuhan City, Hubei province, China. The new virus is thought to have originated from an animal-to-human spillover event linked to seafood and live-animal markets. The infection has spread locally in Wuhan and elsewhere in China, despite strict intervention measures implemented in the region where the infection originated on Jan 23, 2020. More than 500 patients infected with COVID-19 outside of mainland China have been reported between Jan 1 and Feb 14, 2020. Although laboratory testing for COVID-19 quickly ramped up in China and elsewhere, information on individual patients remains scarce and official datasets have not been made publicly available. Patient-level information is important to estimate key time-to-delay events (such as the incubation period and interval between symptom onset and visit to a hospital), analyse the age profile of infected patients, reconstruct epidemic curves by onset dates, and infer transmission parameters. We searched PubMed for publications between Jan 1, 1990, and Feb 6, 2020, using combinations of the following terms: ("coronavirus" OR "2019-nCoV") AND ("line list" OR "case description" OR "patient data") AND ("digital surveillance" OR "social media" OR "crowd-sourced data"). The search retrieved one relevant study on Middle East respiratory syndrome coronavirus that mentioned FluTrackers in their discussion, a website that aggregates epidemiological information on emerging pathogens. However, FluTrackers does not report individual-level data on COVID-19. To our knowledge, this is the first study that uses crowdsourced data from social media sources to monitor the COVID-19 outbreak. We searched DXY.cn, a Chinese health-care-oriented social network that broadcasts information from local and national health authorities, to reconstruct patient-level information on COVID-19 in China. We also queried international media sources and national health agency websites to collate data on international exportations of COVID-19. We describe the demographic characteristics, delays between symptom onset, seeking care at a hospital or clinic, and reporting for 507 patients infected with COVID-19 reported until Jan 31, 2020. The overall cumulative progression of the outbreak is consistent between our line list and an official report published by the Chinese national health authorities on Jan 28, 2020. The estimated incubation period in our data aligns with that of previous work. Our dataset was made available in the public domain on Jan 21, 2020. Crowdsourced line-list data can be reconstructed from social media data, especially when a central resource is available to curate relevant information. Public access to line lists is important so that several teams with different expertise can provide their own insights and interpretations of the data, especially in the early phase of an outbreak when little information is available. Publicly available line lists can also increase transparency. The main issue with the quality of patient-level data obtained during health emergencies is the potential lack of information from locations overwhelmed by the outbreak (in this case, Hubei province and other provinces with weaker health infrastructures). Future studies based on larger samples of patients with COVID-19 could explore in more detail the transmission dynamics of the outbreak in different locations, the effectiveness of interventions, and the demographic factors driving transmission. For an example of an online source see https://ncov.dxy.cn/ ncovh5/view/pneumonia information on patients reported in China, we also compiled additional information on internationally exported cases of COVID-19. We obtained data for 21 countries outside of mainland China (Australia, Cambodia, Canada, France, Germany, Hong Kong, India, Italy, Japan, Malaysia, Nepal, Russia, Singapore, South Korea, Sri Lanka, Taiwan, Thailand, United Arab Emirates, the UK, the USA, and Vietnam). We gathered and cross-checked data for infected patients outside of China using several sources, including global news media (Kyodo News, Straits Times, and CNN), official press releases from each country\'s Ministry of Health, and disease control agencies. In addition to detailed information on individual patients, we reconstructed the daily progression of reported patients in each province of China from Jan 13, until Jan 31, 2020. We used the daily outbreak situation reports com municated by provincial health authorities, covered by state television and media, and posted on DXY.cn. All patients in our databases had a laboratory confirmed SARS coronavirus 2 (SARS-CoV-2) infection. Our COVID-19 database was made publicly available as a Google Sheet, disseminated via Twitter on Jan 21, 2020, and posted on the website of Northeastern University, (Boston, MA, USA) on Jan 24, 2020, where it is updated in real time. Data used in this analysis, frozen at Jan 31, 2020, are available online as a spreadsheet. We assessed the age distribution of all patients with COVID-19 by discharge status. We adjusted the age profile of Chinese patients by the population of China. We used 2016 population estimates from the Institute for Health Metrics and Evaluation 9 to calculate the relative risk (RR) of infection with COVID-19 by age group. To calculate the RR, we followed the method used by Lemaitre and colleagues 10 to explore the age profile of influenza, where RR for age group i is defined as where C i is the number of cases in age group i and N i is the population size of age group i. To estimate trends in the strength of case detection and interventions, we analysed delays between symptom onset and visit to a health-care provider, at a hospital or clinic, and from seeking care at a hospital or clinic to reporting, by time period and location. We considered the period before and after Jan 18, 2020, when media attention and awareness of the outbreak became more pronounced. We used non-parametric tests to assess differences in delays between seeking care at a hospital or clinic and reporting between locations (Wilcoxon test to compare two locations and Kruskall-Wallis test to compare three or more locations). We estimated the duration of the incubation period on the basis of our line list data. We analysed a subset of patients returning from Wuhan who had spent less than a week in Wuhan, to ensure a narrowly defined exposure window. The incubation period was estimated as the midpoint between the time spent in Wuhan and the date of symptom onset. We did all analyses in R (version 3.5.3). We considered p values of less than 0·05 to be significant. The funder had no role in study design, data compilation, data analysis, data interpretation, or writing of the report. All authors had access to the data, and had final responsibility for the decision to submit for publication. Our line list comprised 507 patients reported from Jan 13, to Jan 31, 2020, including 364 (72%) from mainland China and 143 (28%) from outside of China The age distribution of COVID-19 cases was skewed towards older age groups with a median age of 45 years (IQR 33-56) for patients who were alive or who had an unknown outcome at the time of reporting (figure 1). The median age of patients who had died at the time of reporting was 70 years (IQR 65-81). Few patients (13 [3%]) were younger than 15 years. Adjustment for the age demographics of China confirmed a deficit of infections among children, with a RR below 0·5 in patients younger than 15 years (figure 1). The RR measure indicated a sharp increase in the likelihood of reported COVID-19 among people aged 30 years and older. A timeline of cases in our crowdsourced patient line list is shown by date of onset in , indicating an acceleration of reported cases by Jan 13, 2020. The outbreak progression based on the crowdsourced patient line list was consistent with the timeline published by China Center for Disease Control and Prevention (CDC) on Jan 28, 2020, 12 which is based on a more comprehensive database of more than 6000 patients with COVID-19. Since Jan 23, 2020, the cumulative number of cases has slowed down in the crowdsourced and China CDC curves (figure 2), which probably reflects the delay between disease onset and reporting. The median reporting delay was 5 days (IQR 3-8) in our data. Province-level epidemic curves are shown by reporting date in . As of Jan 31, 2020, 16 (52%) of 30 provinces in mainland China had reported more than 100 confirmed cases. The apparent rapid growth of newly reported cases between Jan 18, and Jan 31, 2020, in several provinces outside of Hubei province is consistent with sustained local transmission. Across the study period, the median delay between symptom onset and seeking care at a hospital or clinic was 2 days (IQR 0-5 days) in mainland China ( ). This delay decreased from 5 days before Jan 18, 2020, to 2 days thereafter (Wilcoxon test p=0·0009). Some provinces, such as Tianjin and Yunnan had shorter delays (data by province not shown), while the early cases from Hubei province were characterised by longer delays in seeking care (median 0 days [IQR 0-1]). The median delay between seeking care at a hospital or clinic and reporting was 2 days (IQR 2-5 days) in mainland China and decreased from 9 days before Jan 18, 2020, to 2 days thereafter (Wilcoxon test p<0·0001; ). Similarly to delays in seeking care at a hospital or clinic, reporting was quickest in Tianjin and Yunnan (median 1 day [IQR 0-1]) and slowest in Hubei province (median 12 days [IQR 7-16]). The median delay between symptom onset and seeking care at a hospital or clinic was 1 day (IQR 0-3) for international travellers, and shorter than for patients in Hubei province or the rest of mainland China (Kruskal-Wallis test p<0·0001; . Even in the period after Jan 18, 2020, when awareness of the outbreak increased, a shorter delay between symptom onset and seeking care at a hospital or clinic was seen for international patients than for those in mainland China (Wilcoxon test p<0·0001). For international cases, the delay between seeking care at a hospital or clinic and reporting was 2 days (IQR 1-4), also shorter than for mainland China (Wilcoxon test p<0·0001; . On the basis of 33 patients with a travel history to Wuhan, we estimated the median incubation period for COVID-19 to be 4·5 days (IQR 3·0-5·5; appendix p 2).  Information from patient line lists is crucial but difficult to obtain at the beginning of an outbreak. Here we have shown that careful compilation of crowdsourced reports curated by a long-standing Chinese medical social network provides a valuable picture of the outbreak of COVID-19 in real time. The outbreak timeline is consistent with aggregated case counts provided by health authorities. For comparison, China CDC published the first epidemic curve by symptom onset on Jan 28, 2020. Line lists provide unique information on the delays between symptom onset and detection by the health-care system, reporting delays, and travel histories. This information cannot be extracted from aggregated case counts published by official sources. Line list data can help assess the effectiveness of interventions and the potential for widespread transmission beyond the initial foci of infection. In particular, shorter delays between symptom onset and admission to hospital or seeking care in a hospital or clinic accelerate detection and isolation of cases, effectively shortening the infectious period. A useful feature of our crowdsourced database was the availability of travel histories for patients returning from Wuhan, which, along with dates of symptom onset, allowed for estimation of the incubation period here and in related work. A narrow window of exposure could be defined for a subset of patients who had a short stay in Wuhan, at a time when the epidemic was still localised to Wuhan. Several teams have used our dataset and datasets from others to estimate a mean incubation period for COVID-19 to be 5-6 days (95% CI 2-11). Our own estimate (median 4·5 days [IQR 3·0-5·5]) is consistent with previous work that used other modelling approaches. The incubation period is a useful parameter to guide isolation and contact tracing; based on existing data, the disease status of a contact should be known with near certainty after a period of observation of 14 days. Availability of a public dataset enables independent estimation of important epidemiological parameters by several teams, allowing for confirmation and cross-checking at a time when information can be conflicting and noisy. An interesting finding in our data relates to the age distribution of patients. We found a heavy skew of infection towards older age groups, with substantially fewer children infected. This pattern could indicate agerelated differences in susceptibility to infection, severe outcomes, or behaviour. However, a substantial portion of the patients in our database are travellers, a population that is usually predominantly adults (although does not exclude children). Furthermore, because patient data in our dataset were captured by the health system, they are biased towards the more severe spectrum of the disease, especially for patients from mainland China. Clinical reports have shown that severity of COVID-19 is associated with the presence of chronic conditions, which are more frequent in older age groups. Nevertheless, we would also expect children younger than 5 years to be at risk of severe outcomes and to be reported to the healthcare system, as is seen for other respiratory infections. Biological differences could have a role in shaping these age profiles. A detailed analysis of one of the early COVID-19 clusters by Chan and colleagues 19 revealed symptomatic infections in five adult members of the same household, while a child in the same household aged 10 years was infected but remained asymptomatic, potentially indicating biological differences in the risk of clinical disease driven by age. Previous immunity from infection with a related coronavirus has been speculated to potentially protect children from SARS, and so might also have a role in COVID-19. In any case, if the age distribution of cases reported here was to be confirmed and the epidemic were to progress globally, we would expect an increase in respiratory mortality concentrated among people aged 30 years and older. This mortality pattern would be substantially different from the profile of the 2009 influenza pandemic, for which excess mortality was concentrated in those younger than 65 years. In our dataset, we saw a rapid increase in the number of people infected with COVID-19 in several provinces of China, consistent with local transmission outside of Hubei province. As of Jan 31, 2020, province-level epidemic curves are only available by date of reporting, rather than date of symptom onset, which usually inflates recent case counts if detection has increased. D e c 9 , 2 0 1 9 D e c 1 6 , 2 0 1 9 D e c 2 3 , 2 0 1 9 D e c 3 0 , 2 0 1 9 J a n 6 , 2 0 2 0 J a n 1 3 , 2 0 2 0 J a n 2 0 , 2 0 2 0 J a n 2 7 , 2 0 2 0 Furthermore, province-level data include both returning travellers from Hubei province (ie, importations) and locally acquired cases, which also usually inflate the apparent risk of local transmission. Notably, other lines of evidence suggest that local transmission is now well established outside of Hubei province, because travel increased just before the Chinese New Year on Jan 25, 2020, and before implementation of the travel ban in Wuhan. Accordingly, our own data include evidence of transmission clusters in non-travellers, with, for instance, a second-generation transmission event reported in Shaanxi on Jan 21, 2020. Our study had several limitations, one of which was the data we used. Although all provinces in mainland China provide aggregated information on infections and deaths, individual-level patient descriptions are only available for a subset of provinces. Geographical coverage is heterogeneous in our line list, and we have a notable deficit of cases from Hubei province, the foci of the COVID-19 outbreak. We expect that little patient-level information is shared on social media by province-level and city-level health authorities in Wuhan and Hubei province because health systems are overwhelmed. For similar reasons, provinces with a large total case count at the end of January, 2020, or with a weaker health infrastructure, were under-represented in our line list, with the exception of Beijing. Other limitations in our data include severity (only patients who had severe enough symptoms to seek care were captured) and changes in case definition. A series of epidemiological criteria were required for COVID-19 testing, including travel history to Wuhan within the past 2 weeks; residence in Wuhan within the past 2 weeks; contact with individuals from Wuhan (with fever and respiratory symptoms) within the past 2 weeks; and being part of an established disease cluster. Some of these criteria (eg, relation to Wuhan) were relaxed over time (appendix). As a result, we have an overrepresentation of travel-related cases in our database. The reproduction number is an important quantity for outbreak control. We refrained from estimating this parameter because reporting changes could bias estimates relying on epidemic growth rates. Furthermore, our dataset captured cases all over China and does not reflect transmission patterns in any particular location. A mean reproduction number of 2·5-2·7 has previously been estimated on the basis of the volume of importations of international cases in the pre-intervention period in Wuhan. We recognise that, although our data source is useful and timely, it should not replace official statistics. Manual compilation of detailed line lists from media sources is highly time consuming and is not sustainable when case counts reach several thousands. Here we provide detailed data on 507 patients when the official case count was over 9000 by Jan 31, 2020, representing a sample of approximately 5% of reported cases and a much smaller proportion of the full spectrum of COVID-19 cases, which include mild infections. A crowd sourced system would not be expected to catch all cases, especially if many cases are too mild to be captured by the health-care system, digital surveillance, or social media. Notably, DXY.cn does not generate data outside of traditional surveillance systems but rather provides a channel of rapid communication between the public and health authorities. In turn, our approach has helped extract and repackage information from health authorities into an analytical format, which was not available elsewhere. At the time of writing, efforts are underway to coordinate compilation of COVID-19 data from online sources across several academic teams. Ultimately, we expect that a line list of patients will be shared by government sources with the global community; however, data cleaning and access issues might take a prohibitively long time to resolve. For the west African Ebola outbreak, a similarly coordinated effort to publish a line list took 2 years. Given the progression of the COVID-19 outbreak, such a long delay would be counterproductive. Overall, the novelty of our approach was to rely on a unique source for social media and news reports in China, which aggregated and curated relevant information. This approach facilitated entry of robust and standard data on clinical and demographic information. Reassuringly, DXY.cn maintains a special section dedicated to debunking fake news, myths, and rumours about the COVID-19 outbreak. Looking to the future, collection of patient data in the context of emergencies could include information on whether patients are identified through contact tracing or because they seek care on their own. Furthermore, data interpretability could be improved by gathering more quantitative information on how case definitions are used in practice. In conclusion, crowdsourced epidemiological data can be useful to monitor emerging outbreaks, such as COVID-19 and, as previously, Ebola virus. These efforts can help generate and disseminate detailed information in the early stages of an outbreak when little other data are available, enabling independent estimation of key parameters that affect interventions. Based on our small sample of patients with COVID-19, we note an intriguing age distribution, reminiscent of that of SARS, which warrants further epidemiological and serological studies. We also report early signs that the response is strengthening in China on the basis of a decrease in case detection time, and rapid management of travel-related infections that are identified internationally. This is an early report of a rapidly evolving situation and the parameters discussed here could change quickly. In the coming weeks, we will continue to monitor the epidemiology of this outbreak using data from news reports and official sources. KS and CV contributed to the study design. KS and JC contributed to the data compilation. KS, JC, and CV contributed to data analysis. KS and JC contributed to the design and drawing of figures. KS, JC, and CV contributed to the writing of the manuscript. We declare no competing interests. All data used in this report have been made publicly available on the Laboratory for the Modeling of Biological + Socio-technical systems website of Northeastern University. The available data include daily case counts of COVID-19 by reporting date and Chinese province, and a de-identified line list of patients with COVID-19. The line list includes geographical location (country and province), reporting date, dates of symptom onset and seeking care at a hospital or clinic, relation to Wuhan, discharge status when known, an English summary of the case description from media sources, and a link to the original source of data. Seeking care at hospital or clinic to report Symptom onset to seeking care at hospital or clinic',
#  'The novel Coronavirus outbreak, (previously known as the 2019-nCoV and later renamed COVID-19 during the writing of this manuscript) is leading to the closure of entire cities in China, and causing stringent measures to be taken in others. While in distant different continents, far from China where the virus was first reported, places are being placed on high alert. In Wuhan, where the virus broke, schools, roads and markets have been shut down . The same is true in Hong Kong, Beijing and Hubei Province amongst surrounding areas, as precautionary measures are being emphasized to ensure that the spread of the virus is minimized, and complete and accurate information on the virus is being obtained . However, the rate of spread of the virus and the uncertainties surrounding the entire situation has led the World Health Organization (WHO) on 30 January 2019 to declare the Coronavirus outbreak a \'Global Public Health Emergency\'. WHO determined, however, not to declare the outbreak a \'Public Health Emergency of International Concern\' (PHEIC) which is a higher level of declaration. A PHEIC is defined as "an extraordinary event which is determined to constitute a public health risk to other States through the international spread of disease and to potentially require a coordinated international response" whose scope may include: serious, sudden, unusual or unexpected; carries implications for public health beyond the affected State\'s national border; and may require immediate international action . With the world having experienced some notable influenza pandemics in the past, a Global Initiative on Sharing All Influenza Data (GISAID) platform was established and was instrumental in the rapid sharing of information by the Chinese scientists regarding the emergence of the COVID-19 virus. Through this platform, scientists from other regions were observed to gain access to information and are, subsequently, able to act in a much faster capacity; like in the case of scientists from the Virus Identification Laboratory based at Doherty Institute, Australia, who managed to grow a similar virus in the laboratory after accessing the data shared by the Chinese scientists . Beyond the aspect of pandemic preparedness and response, the case of COVID-19 virus and its spread provide a fascinating case study for the thematics of urban health. Here, as technological tools and laboratories around the world share data and collectively work to devise tools and cures, similar efforts should be considered between smart city professionals on how collaborative strategies could allow for the maximization of public safety on such and similar scenarios. This is valid as smart cities host a rich array of technological products that can assist in early detection of outbreaks; either through thermal cameras or Internet of Things (IoT) sensors, and early discussions could render efforts towards better management of similar situations in case of future potential outbreaks, and to improve the health fabric of cities generally. While thermal cameras are not sufficient on their own for the detection of pandemics -like the case of the COVID-19, the integration of such products with artificial intelligence (AI) can provide added benefits. The fact that initial screenings of temperature is being pursued for the case of the COVID-19 at airports and in areas of mass convergence is a testament to its potential in an automated fashion. Kamel Boulos et al. supports that data from various technological products can help enrich health databases, provide more accurate, efficient, comprehensive and real-time information on outbreaks and their dispersal, thus aiding in the provision of better urban fabric risk management decisions. The above improvements in the healthcare sector can only be achieved if different smart city products are fashioned to support standardized protocols that would allow for seamless communication between themselves. Weber and Podnar Žarko suggest that IoT devices in use should support open protocols, and at the same time, the device provider should ensure that those fashioned uphold data integrity and safety during communication and transmission. Unfortunately, this has not been the case and, as Vermesan and Friess explain, most smart city products use proprietary solutions that are only understood by the service providers. This situation often creates unnecessary fragmentation of information rendering only a partial integrated view on the dynamics of the urban realm. With restricted knowledge on emergent trends, urban managers cannot effectively take decisions to contain outbreaks and adequately act without compromising the social and economic integrity of their city. This paper, inspired by the case of the COVID-19 virus, explores how urban resilience can be further achieved, and outlines the importance of seeking standardization of communication across and between smart cities. With the advent of the digital age and the plethora of Internet of Things (IoT) devices it brings, there has been a substantial rise in the amount of data gathered by these devices in different sectors like transport, environment, entertainment, sport and health sectors, amongst others . To put this into perspective, it is believed that by the end of 2020, over 2314 exabytes (1 exabyte = 1 billion gigabytes) of data will be generated globally from the health sector. Stanford Medicine acknowledges that this increase, especially in the medical field, is witnessing a proportional increase due to the increase in sources of data that are not limited to hospital records. Rather, the increase is being underpinned by drawing upon a myriad and increasing number of IoT smart devices, that are projected to exponentially increase the global healthcare market to a value of more than USD $543.3 billion by 2025 . However, while the potential for the data market is understood, such issues like privacy of information, data protection and sharing, and obligatory requirements of healthcare management and monitoring, among others, are critical. Moreover, in the present case of the Coronavirus outbreak, this ought to be handled with care to avoid jeopardizing efforts already in place to combat the pandemic. On the foremost, since these cut across different countries, which are part of the global community and have their unique laws and regulations concerning issues mentioned above, it is paramount to observe them as per the dictate of their source country\'s laws and regulations; hence, underlining the importance of working towards not only the promoting of data through its usage but also the need for standardized and universally agreed protocols. While the significance of such data in advancing efficiency, productivity and processes in different sectors is being lauded, there are criticisms arising as to the nature of data collection, storage, management and accessibility by only a small group of users. The latter particularly includes select ICT corporations that are also located in specific geographies . These criticisms are justified, as in recent years, big data is seen as the new \'gold rush\' of the 21st century and limiting its access means higher economic returns and increased influence and control at various scales to those who control data. These associated benefits with big data are clearly influencing geopolitical standings, in both corporate and conventional governance realms, and there is increased competition between powerful economies to ensure that they have the maximum control of big data. As case in point is the amount of \'push and pull\' that has arisen from Huawei\'s 5G internet planned rollout . Though the latter service offers unprecedented opportunities to increase internet speeds, and thereby influence the handling of big data, countries like the U.S. and some European countries that are key proponents and players in global political, economic and health landscapes, are against this rollout, arguing that it is a deceptive way of gathering private data under the guise of espionage. On this, it has been noted that the issue of data control and handling by a few corporations accords with their principles of nationalism, and that these work for their own wellbeing as well as to benefit the territories they are registered in. Therefore, geopolitical issues are expected on the technological front as most large data-rich corporations are located in powerful countries that have influence both economically, health-wise and politically . Such are deemed prized tokens on the international landscape, and it is expected that these economies will continue to work towards their predominant control as much as possible. On the health sector, the same approach is being upheld where critical information and data are not freely shared between economies as that would be seen to be benefiting other in-competition economies, whereas different economies would cherish the maximization of benefits from such data collections. In addition to the obvious deep-rooted social issues related to nationalism, other challenges include the increasing movement of people globally that is being enhanced by reduced costs and higher speed. In particular, these challenges are more pronounced when it comes to public health. This is because most of the health-related data collected not only can compromise local nations, but also captures those of travelers. In such cases, in a bid to improve the health status of a nation, it becomes paramount to factor in data from other regions necessitating unhindered sharing of this data. Such data-sharing truth is emphasized in situations like the recent case of Coronavirus outbreak threatening the global health environment, facilitated by air transportation. The virus was first reported in Wuhan, China, and in a matter of three weeks (by 17th January 2020) over 300 cases were confirmed in that region, and 10 days later (26th January 2020), a total of 2014 cases of Coronavirus have been reported, with 684 of those being confirmed, and with 29 reported outside China. The fatalities from the virus stands at 56 as of 26th January 2020 . The virus had then been confirmed in various countries including Taiwan, South Korea, Japan, Thailand, France, the United States, Singapore and Vietnam . In the above case, though major cities are known to prepare themselves for potential outbreaks, their health policies and protocols are observed to diverge from one another. Thus, without a global collaborative approach, progress towards working for a cure and universally acceptable policy approach can take longer. Such fears, of a lack of international collaboration, were highlighted by the World Health Organization (WHO) during an emergency meeting in Geneva on 22nd January 2020 to determine whether the virus outbreak had reached a level warranting international emergency concern. However, WHO was satisfied that China was being proactive in this case, unlike in 2002, when China withheld information on the outbreak for far too long, causing delays in addressing the epidemic . As in this instance, it is the opinion in this paper that if there was seamless collaboration and seamless sharing of data between different cities, it would not warrant such a high-level meeting to result in action, and instead, a decision could have been made much earlier. On this, the saddest part is that some global cities are less prepared to handle the challenges posed by this type of outbreak for lack of information on issues like symptoms of the virus, the protective measures to be taken, and the treatment procedures that an infected person should be processed through, amongst other issues. With the Coronavirus , it took only 17 days (31st December 2019 to 17th January 2020) to be identified. The sharing of data has also been quicker, as immediately after the virus\' genetic sequence was discovered, Chinese scientists were able to share the information with the WHO, thus helping in its identification and enabling the auctioning of precautionary measures in other countries. Latest technological tools have also allowed for the receipt of information in realtime, in contrast to traditional epidemiological approaches that would have required months to identify the outbreak type . Similarly, though substantial data and information on the disease has been shared, Wetsman acknowledges that there is a lack of some vital information, like the ease of spread of the virus from person-to-person, and this is a key to containing the disease as interactions between people from different parts of the globe are still active. This hindrance can be made further possible as many cities advance in their smart and safe city model implementation towards constructing sufficient soft and hard urban infrastructures equipped with, for example, thermal imagery sensors to allow for early detections. However, while that is the case, data access to many is a challenge because the information is often seen as being sensitive for national security reasons, whilst at the same time, acknowledging that a virus outbreak is an equal threat to both national security and the economy. The outbreak of any disease has significant impacts on local economies across the globe. For instance, when SARS (Severe Acute Respiratory Syndrome) (SARS-CoV) broke in China in 2002, it was estimated, that the Asian region incurred tremendous negative impacts socially, health-wise and economically, potentially amounting to Asian regional economy losses of between USD $12-18 billion from tourism, travel and retail sales industries alone . The Zika virus outbreak, spread by daytime-active Aedes mosquitoes, is estimated to have cost equator-belt local economies in affected areas between USD $7 and USD $18 billion . The Ebola virus (or Ebola hemorrhagic fever (EHF)) caused an estimated loss of USD $2.2 billion in GDP in three West African economies (Guinea, Liberia and Sierra Leone) in 2015 alone . In regard to the current epidemic of Coronavirus, though it is too early to quantify or project its impacts on the global economy, there are fears that it may take the precedent of other outbreaks where billions of dollars will be lost. The foundations for this escalating loss can be witnessed in the rapid growth of travel bans being enacted by some countries and their international airports, especially specifically restricting people from visiting the affected regions in China and their growth into general non-Chinese travel movements. On this, noting that the outbreak came almost on the eve of the Lunar New Year celebrations, and that it had been estimated that over 400 million people were expected to travel in different parts of the world and China to observe this festivity, the majority have had to reconsider their options as to flights, hotels and entertainment events due to service provider cancellations . Those who had already booked their flights are expected to receive their refunds following the directive by the Civil Aviation Administration of China, however, this move has already affected the share value of Chinese airline companies . The above impacts demonstrate that the issues of virus outbreaks transcend urban safety and impacts upon all other facets of our urban fabric. Therefore, it becomes paramount to ensure that the measures taken to contain a virus transcend nationalist agendas where data and information sharing is normally restricted, to a more global agenda where humanity and global order are encouraged. With such an approach, it would be easier to share urban health data across geographies to better monitor emerging health threats in order to provide more economic stability, thereby ensuring no disruptions on such sectors like tourism and travel industries, amongst others. This is possible by ensuring collaborative, proactive measures to control outbreak spread and thus, human movements. This would remove fears on travelers, and would have positive impacts upon the tourism industry, that has been seen to bear the economic brunt whenever such outbreaks occur. This can be achieved by ensuring that protocols on data sharing are calibrated to remove all hurdles pertaining to sharing of information. On this, Lawpoolsri et al. posits that such issues, like transparency, timelessness of sharing and access and quality of data, should be upheld so that continuous monitoring and assessment can be pursued. Virus outbreaks in recent years have shown that, in the urban realm, data, including health data, can be sourced from diverse places. Presently, in the case of Coronavirus (COVID-19) outbreak, data is being collected from airports through screening and monitoring, through the use of smart sensors installed in airport infrastructures and from personnel working in those air/seaports. For instance, it has been reported that in the U.S.A., screening is being carried out at 20 different airports to ensure that possible affected people are intercepted for quarantine at the point of entry. Beside airports, as reported by Buckley and May , data is also being collected at bus terminals, market places (in Wuhan), subways, and also in health facilities where patients are taken for further medical attention. Such is prevalent especially in China, and other Asian regions where cases of the virus have been recorded and confirmed. In addition to these methods, other smart city data sources include the application of terminal tracking systems that are mostly emphasized in Safe City concepts, where, at the point of entry or departure, relevant data is collected and analyzed. Li et al. highlights that sensors installed in such locations have the potential to receive and distribute data in real-time to digital infrastructures within the network, and their interconnectedness in the network renders them extremely efficient in providing real-time updates on different issues. Urban areas are also known to be amassed with numerous Urban Health sensors, some of which are wearable. Though these are not specifically fashioned to track the present case of virus outbreak, they are able to track other related parameters like heartbeat, blood pressure, body temperature and others variables, that when analyzed can offer valuable insights. Loncar-Turukalo et al. hail these devices for their role in transforming the health care sector especially by allowing for Connected Health (CH) care, where data collected from them can be analyzed and provide insightful information on the health scenario in any given area. Vashist et al. further highlight how emerging features such as spatiotemporal mapping, remote monitoring and management, and enhanced cloud computing capabilities can emanate from such endeavours, leading to better urban management potential. While it is true that the basic source of medical data is generally sourced from general practitioners or medical laboratories-a fact that has also been affirmed in the case of the current epidemic-this paper explores how data sourced from an urban perspective can contribute to the medical narrative. The conviction to dwell on the urban realm in this manuscript is based on the fact that the current epidemic (COVID-19) is transmitted majorly through human-to-human contact, and in most cases, especially where the spread is reported in a different country, the first point of contact is an urban area, where large groups of people convene, like airports or subway stations. In most cases, such facilities, which are mostly based in urban areas, are observed to have installed surveillance technologies to ensure that anyone showing any symptoms of the disease are identified and quarantined. However, even in such cases, as underlined in the present manuscript, the need for anonymizing medical data is emphasized to ensure that the use of current technologies does not breach data privacy and security requirements, across different geographies. In this case, novel technologies like Blockchain technologies and quantum cryptography can aid in the discussion and be made to integrate with data collecting technologies. This would render an increased wealth of data from both the medical field and smart city operators, while ensuring privacy and security; hence, aiding in providing relevant information for better informed decisions. However, despite the indisputable roles that installed devices play in providing relevant health information, their data communication aspect needs to be reviewed. First, communications are seen to be geography-restricted (restricted to a given location), such that they seldom expand or communicate with their like, installed beyond their restricted areas. Secondly, these devices are usually sourced and installed by separate corporations that maintain unique and specific standards for data processing and sharing, and accordingly, tying cities to the sole usage of their product(s). Such strategies are adopted as private corporations try to maximize their economic gains, since the digital solution market is a lucrative one and is expected to continue growing and expanding . For its current application, the standardization of protocols as elaborated in this manuscript need to be pursued to ensure that there is seamless sharing of information and data. By doing this, it is expected that issues like burdens of collecting data, accuracy and other complexity that are experienced (when systems are fragmented) are reduced or eliminated altogether. The standardization can be achieved by, for example, ensuring that all the devices and systems are linked into a single network, like was done in the U.S., where all the surveillance of healthcare were combined into the National Healthcare Safety Network (NHSH) . The fact that cities are increasingly tuning on the concept of Smart Cities and boasting an increased adoption rate of technological and connected products, existing surveillance networks can be re-calibrated to make use of those new sets of databases. Appropriate protocols however have to be drafted to ensure effective actions while ensuring privacy and security of data and people. With scenarios like the present Coronavirus (COVID-19) outbreak, that not only impacts upon the economic status of cities, but also affects their social standing, it becomes imperative to emphasize the adoption of universal standards for data sharing. Such a move could have far reaching impact across cities and territories especially in positively combating outbreaks and disasters in a quicker, safer and standardized way, such that when the cure is discovered, the results can be replicated in various parts of the globe. With a collaborated data sharing protocol, it would be possible to have a larger dataset resulting in increased processing capabilities especially with technologies that are powered by artificial intelligence (AI) tools. Through this way, as noted by Jiang et al. and Allam , it would be possible to facilitate early detection, achieve better diagnosis and provide better urban management decisions for increased efficiency for virus containment. An example of how beneficial collaboration and sharing of data can be occurred during the 2014 Ebola outbreak in West Africa where scientists, health workers and clinicians, amongst other stakeholders from around the world, openly worked together and were able to contain the spread of this pandemic . On this front, Boué et al. highlight that levels of trust and transparency need to be reviewed and enhanced to facilitate unfettered data generation and sharing. Such could lead to an even earlier detection scenario of future virus outbreaks, and in the better curative management of the same, without minimal compromise on urban functions and on an urban economy. Furthermore, in cases of emergencies like the current outbreak of COVID-19 and any other, the need for observance of regulatory practices and international healthcare guidelines are paramount. This would ensure that both healthcare professionals and the general populace are informed, protected and remain within the prescribed rules and regulations. As noted by the WHO , the healthcare guidelines and regulatory practices are advanced to also ensure that the health risk in question is reduced together with its consequences. In the current era of technological advancement, such regulations and guidelines are paramount as they have potential to lead to positive or negative outcomes. The position of this paper is to advance that it now possible to integrate technologies like the use of smart devices through IoT networks and wearable devices, data from mobile apps and others to help users to share information with accredited and certified health professionals, and in this case, improve the outcomes for better cross disciplinary and more resilient protocols and policies.',
#  'Organization have issued interim guidelines in order to protect the population, and to attempt to prevent the further spread of COVID-19 from infected individuals . In order to reduce the time to identification of a person under investigation (PUI) for the COVID-19 infection, and the rapid isolation of this individual, we propose to collect the basic travel history along with the more common manifestations using a phone-based online survey. Such collected data can be used to assist in the preliminary screening and early identification of possible COVID-19 infected individuals. Thousands of data points are able to be collected and processed through an artificial intelligence (AI) framework which can ultimately evaluate individuals that may be infected and stratify them into no-risk, minimal-risk, moderate-risk, and high-risk of being infected with the virus. The identification of the high-risk cases can then be quarantined earlier, thus decreasing the chance of spread. is inserted here. See Appendix I for the details on the steps involved in data collection on all the respondents independent of whether or not they think they are infected. The AI algorithm described in Appendix II is to identify possible case identifications and send alerts to the nearest health clinic as well as to the respondent for an immediate health visit, we call this as an "alert for health check recommendation for COVID-2019. In case the respondent is unable to commute to the health center, the health department can then send an alert to a mobile health unit so they can then do doorto-door assessments and even testing for the virus. This generates alert for mobile health check recommendation for 2019-nCoV (MHCRC). If a respondent does not have an immediate risk of having symptoms or signs related to the viral infection, then the AI-based health alert will be sent to the respondent to notify them that there is no current risk of COVID-2019. summarizes the outcomes of data collection and identification of possible cases. The data recorded in step 5 of the algorithm using signs and symptoms will be collected prior to both the groups who have received alerts HCRC or MHCRC (for possible identification and assessment) and NCRC (for non-identified respondents). These are explained in steps (iii) and (iv) in the Appendix II. The extended analysis proposed will help to understand if there is any association with different sociodemographic variables and the manifestations such as fever and signs and lower respiratory infections, including cough and SOB in individuals defined as either with and without possible infection. Applications of AI and deep learning argued to be useful tools in assisting diagnosis and treatment decision making . There were studies which promoted disease detection through AI models . Use of mobile phones and web based portals have been tested successfully in health related data collection. However, one need to apply such techniques in a timely way for faster results. Apart from cost-effectiveness, the proposed modeling will be of great assistance in identifying and controlling when populations are closed due to virus spread. In addition to these, our proposed algorithm can be easily extended to identify individuals who might have any mild symptoms and signs. We have developed our data collection criteria based on CDC\'s Flowchart to Identify and Assess 2019 Novel Coronavirus and added additional variables for the extended utility of our efforts in identifying infected and controlling the spread (see ). Let be the outputs recorded during the data collection steps 1 (ii) If the set of identifiers, , for is equal to one of the elements of the set then send HCRC or MHCRC to that respondent, else proceed to the test criteria (iv). If is equal to one of the elements of the set , for then the respondent will be sent an NCRC alert. (iv) If is equal to one of the elements of the set , then the respondent will be sent an NCRC alert. Comparison of test criteria results of (iii) and (iv) with their corresponding geographic and socio-demographic details will yield further investigations of signs and symptoms Suppose we define two events and using the sets and as below:',
#  "Infectious diseases are responsible for 25% of the annual global deaths . Epidemics arising from these diseases are unpredictable, carry uncertain, varying risks, and narratives in different contexts . It is important that the scholarly literature represents the diverse and sometimes competing, narratives from all affected particularly the most vulnerable . Arguably, national and global responses to epidemics are inherently political. The experts selected for consultation, the evidence used to inform response pathways, and narratives of blame, vulnerability, and responsibility are politically driven, and require analysis. In this paper, epidemics refer to a spike, above normal, in the prevalence of a specific disease in a specific population (Centre for Disease Control and Prevention 2012). The literal definition of politics is the Bactivities that relate to influencing the actions and policies of a government or getting and keeping power in a government^ . However, in the Bpolitics of epidemics^literature, politics often refers to the extensive and diverse influence of local, national, and international governments and organizations, on the health outcomes of communities during disease outbreaks. For the purposes of this paper, politics will also refer to how scholars talked about epidemics, and whose narratives are reported in the reviewed literature. Several key themes emerge when examining scholarly narratives about the politics of epidemics namely the socioeconomic distribution of disease, decision-making in research and development, the credibility of evidence that informs response pathways, and attribution of responsibility for causing the outbreak and determining who is responsible for responding. While the literature presents obviously competing narratives that explain disease outbreaks, a close examination of these reveals a high prevalence of certain narratives, which reflect the influence of power and privilege. Hence, some of the literature has called for increased representation of the narratives from the most marginalized populations who bear the brunt of epidemics, in the scholarly literature . Furthermore, the decisions with regard to the type of research, the study population and the research beneficiaries often lack transparency, are also dominated by the most powerful. The case of the swine flu, dubbed the Bpandemic that never really was^ , demonstrates the potential consequences of private and uncommunicated decision-making processes in research and development. In the case of the swine flu, allegedly scientists conducting research and advising the World Health Organization on the virus were compensated by drug companies. This may have introduced bias . By increasing fear of a global pandemic, scientific research funded by pharmaceutical companies justified the stockpiling of antiviral supplies, thus increasing their vaccine sales . Fear mongering has been criticized in the literature, since it has the potential to distort the evidence, which should guide credible decision-making. It is therefore critical to assess the credibility and quality of the evidence that is being used to inform the response to epidemics around the world, particularly in light of the complicated and often problematic relationship between the academic and industry . In determining the credibility of evidence, consideration should be given to the legitimacy and authority of its producer(s). Given these sociopolitical issues that characterize epidemics, a comparative analysis of the narratives on the politics of epidemics is relevant. To date, most of the narratives in the scholarly literature on the politics of epidemics have considered one epidemic in one context . We hypothesize that the narratives may vary depending on the kind and context of the epidemic. The overall aim of this review paper is to discuss the scholarly narratives on the politics of disease epidemics by diseases and income level; Ebola in a low-income setting, Zika in a middle-income setting, and SARS in a high-income setting ). This analysis allowed for the consideration of the role that socioeconomic, geographical, and cultural context might play in the narratives on the attribution of blame and response to disease outbreaks. The paper is based on a review of the peer-reviewed published medical, social, and political literature, which was accessed using four electronic databases-PubMed, Sociological Abstracts, Scholars Portal, and Web of Science. The search was limited to full text articles published between 2002 and 2017. A similar approach was used in identifying the relevant articles. For each epidemic, the search terms included the name of the disease outbreak and the country of interest: BSARS AND Toronto,^BZika AND Brazil,^and BEbola AND Liberia.^It was vital that each search included the income setting to ensure that the findings were contextually relevant. These terms were then combined with the terms Bsociet*,^Bsociol*,^and Bpolitic*.^Truncation was used to ensure inclusion of all terms, including Bsociety,^Bsocietal,B sociological,^Bsociology,^Bpolitics,^Bpolitical,B politician,^and Bpoliticization.^The search terms must have Tables 2, 3, and 4 summarize the search results for the three outbreaks within the specified contexts. The titles and abstracts of the search results were reviewed for relevance to the politics of epidemics. All biomedical articles describing biomedical research were excluded. All articles that were relevant to the study topic were retrieved and reviewed. The initial review involved RA, grouping the search results according to the disease outbreak. For each outbreak, RA first scanned through a couple of papers, identifying the emerging themes. Once these were identified, subsequent reviews were structured along these themes, although an open stance was maintained throughout to enable the reviewer to identify any additional relevant themes. This review focused on the SARs epidemic in Canada, specifically Toronto, Ontario; to represent an epidemic in a high income country. summarizes the search results. The literature on SARS spoke to two of the four themes of the politics of epidemics, namely the credibility of evidence informing response pathways and the attribution of infectious disease responsibility. The use of quarantine as a control measure, although considered to be highly effective, is controversial. A telephonebased survey in the Greater Toronto Area aiming to ascertain public perceptions of the use of quarantine found that while quarantine was perceived to be a necessary and effective strategy, its ethical implementation should involve the collaboration of policy-makers, public health organizations, and the general population, and should be closely regulated to ensure appropriate use and protection of individual rights ). Despite these recommendations, Toronto quarantined significantly more people during the SARS outbreak compared to the other affected cities, including Hong Kong and Shanghai . Given the reported psychological distress reported by those quarantined, Toronto might have considered other strategies, such as the use of face masks to better Bdistribute the burden of containment measures . Critics note that the extensive quarantining in Toronto lacked proper policies and procedures to guide its implementation . Others highlight a lack of public record detailing any consultation between public health officials and the Ontario Human Rights Commission . There was little public scrutiny, which was suggested to be the result of effective conditioning of the public consciousness to believe that quarantining recommendations would be made fairly and legitimately by senior public health officials . Ultimately, it was not quarantining that was problematic, but the lack of apparent or sufficient evidence to guide its implementation. Beyond extensive quarantining, The World Health Organization issued travel-advisories as an additional control measure to contain further national and international spread of SARS from Toronto . This travel-advisory cost Toronto $1.1 billion and restricted the international right for freedom of movement . criticizes these travel-advisories for various reasons. For example, the advisories were not made by the WHO in consultation with Toronto authorities and led to an uneven global distribution of the burden of SARS . Furthermore, the travel advisories were based on old data resulting from delayed communication between the federal government of Canada and the WHO, as information had to first travel from municipal to provincial to federal health authorities . Once again, the evidence used to inform the response was outdated and therefore considered unjustified. As such, there were problems with both the lack of quality evidence and the travel advisory as a response. Quarantining and travel advisories reflect the profound ethical and political implications inherent in responding to infectious disease outbreaks. The evidence on the efficacy of the two primary responses-quarantining and travel advisorieswas inadequate to justify the extent of their implementation. In the case of SARS in Toronto, the literature reported on both forms of responsibility. While some of the literature attributed the responsibility to the Canadian health care system for being unprepared to manage SARS, the other literature tended to (or report on the) how the public attributed blame to the Asian-Canadian community for Bbringing^the outbreak. The latter narratives ultimately led to the racialization of the epidemic.  The narratives on attribution of responsibility for managing the SARs outbreak is most prevalent in the literature, with the limited capacity of the Canadian Health Care System to prepare for and respond to an emerging infectious disease as a main narrative. Many scholars pointed to flaws in the funding and organization of Canada's health care system-federally, provincially, and municipally-to explain the SARS outbreak in Toronto. Some argued that information was communicated inefficiently and was often incorrect . Personal protective equipment was in short supply , and the use of occupational health and safety in Toronto's hospital system was inefficient, leading to further spread ). Toronto's hospital system was also blamed for lacking admission policies and public laboratories : an inability to supply health services when demand suddenly increased and a lack of quality leadership ). Toronto was also ill-prepared to address the unique vulnerability of the homeless to SARS . Inadequate collaboration between the various levels of government in Canada was blamed for the apparent inefficiencies and inadequacies in the functioning of the health care system and the response to public health crises. This led to disorganized contact tracing, quarantining, and communication to the public (MacDougall 2007). Financial challenges within the Canadian Health Care System further enhanced Toronto's vulnerability to the SARS epidemic, including a lack of resourcing towards public health infrastructure and acute care . Similar to MacDougall (2007), point to a lack of cooperation and collaboration between the three levels of government to explain this public health crisis-ownership of responsibility and duty to respond was deflected between each level and remained unclear . According to the literature, SARS quickly became a profoundly racialized disease and inflamed racial tensions in the Greater Toronto Area ultimately leading to the social exclusion of a racial minority-the Asian-Canadian group . Such avoidance and stigmatization is reported to have played out in several spaces, such as on public transit and other public spaces, and families advising children to avoid Chinese peers in school . Some of the literature posits that this racialized stereotyping could have been prevented with denunciation from leaders in government and public health .On the other hand, according to , individualized health behaviors aimed at preventing SARS contraction-for instance, wearing a face mask-may have justified the avoidance of the stigmatized of the Asian-Canadians . Culture and ethnicity functioned not only as a risk factor for discrimination but also as a facilitator in the response to the outbreak. The Chinese-Canadian community in Toronto employed numerous strategies to combat SARS and ease social anxieties, including fundraising for research, the dissemination of health promotion materials, and launching a SARS support line, among other activities . The mobilization of spiritual leaders was also found to be an effective means of disseminating public health information ). While it is important to recognize the contributions of cultural and ethnic groups, we assert that cultural and ethnic minority groups are more often targets of blame, as was the case for SARS in Toronto. summarizes the total number of hits, and the number of papers that were retrieved and reviewed. The reviewed literature spoke to all of the themes identified in the politics of epidemics literature (socioeconomic distribution of disease, credibility of evidence, and the attribution of infectious disease responsibility), with the exception of decision-making in research and development. Similar to the above outbreaks, the people who were most affected by Zika were in some way socially marginalized, the poor, and more specifically, poor women. At the global level, it was the poorer countries and communities within those regions that were most impacted: those with precarious and/or inconsistent access to health care services, lacking the resources and infrastructure to prevent, diagnose, and treat the virus . No wonder the impact of the outbreak was more devastating in Brazil, which was already financially strained prior to the emergence of the Zika virus, with limited human resources: doctors, nurses, and other specialists , as compared to higher income countries who were more protected from the effects of the Zika virus given effective prevention programs, funding, and infrastructure . Easy and extensive access to mosquito repellants, air conditioning, effective waste management programs, and low rates of urban crowding protect more economically developed countries, such as the USA from Zika transmission . Consequently, Zika virus has aptly been labeled an Binfectious disease of poverty^ 3) . Some of the literature attributed the lack of public health infrastructure and resources to respond to the Zika outbreak in Brazil to these inequalities with regard to who is most affected. Contrasting Zika with HIV, since HIV/AIDS initially affected the prestigious population, such as celebrities, doctors, scientists , they were able to advocate and secure increased public funding of HIV interventions from the Ministry of Health and State Departments of Health. However, this seems to have happened at the expense of funding for vector control programs, such as those controlling mosquito vectors responsible for the transmission of dengue and Zika viruses, which mostly affected poorer communities . Among the poor populations, women who were either pregnant or considering pregnancy were also more vulnerable to the effects of Zika, as the virus is considered to be a teratogen . Government programs in Brazil that intend to provide free mosquito repellent to pregnant women do not consistently reach some of the poorest communities, such communities are remote and often lack accessible quality health care facilities due to distance and poor physical infrastructure . Access to quality reproductive health care is essential given the accumulating evidence linking the Zika virus to a rise in cases of infants born with microcephaly-an unusually small head for age and sex (World Health Organization 2017a). Conditions related to poverty, such as poor sanitation and increased exposure to larvicides and insecticides, which may cause mutation, have also been implicated . While the climate of South America is particularly suitable for replication of the Aedes mosquito-the vector responsible for transmission of the Zika virus ), climate change is thought to have resulted in extremely heavy rainfall and droughts which can support this proliferation of mosquitos, hence facilitating the spread of the Zika epidemic . Both the puddles created during heavy rainfall and the open barrel water storage during droughts, create ideal breeding places for mosquitos . Since the causes of climate change are complex, global, and political, the responses are political, often emphasizing the symptoms of climate change at a local level (in especially the poor countries), rather than addressing the causes at a global level. A combination of an ecosystem-focused perspective and a social-anthropological lens is vital Bbecause a pathogen requires a receptive population in order to cause disease . Ultimately, the need for a cross-disciplinary response pathway cannot be understated . Comprehending the diversity of causal explanations and associated responses allows for the politics of evidence to be more deeply appreciated. The production and evaluation of the evidence used to provide a causal explanation and promote a response is also political. In the case of the Zika epidemic, most of the literature on the attribution of infectious disease responsibility focused primarily on the cause, rather than the response, with most of the literature suggesting that globalization was responsible for the spread of Zika. Although typically the blame of the origins of infectious diseases are typically placed on a cultural minority group ; World Health Organization 2017c), with response action emphasizing individual behavior, prevention of mosquito bites and sexual transmission (World Health Organization 2017d; Brym and Lie 2014), however, arguably, in the case of the Zika epidemic, considerable responsibility was given to the effects of globalization. Globalization has been defined in numerous ways. For example, Bryn and Lie (2014) defined it as the Brapid increases in the volume of international trade, travel, and communication [which has] broken down the isolation and independence of most countries and people^(pg. 27) . This interpretation of globalization will be used of the purposes of this review. According to the literature, the interconnectedness between countries through travel facilitated the spread of Zika beyond the Zika Forest of Uganda-where it was initially discovered in the 1940s-to South East Asia in the 1960s, the Island of Yap in 2007, French Polynesia in 2013, and South America in 2015 . In light of the profound influence of globalization, international human travel, climate change, and urbanization on the spread of the Zika virus, some of the literature has called for research that assesses the feasibility of a more global response to preventing spread. summarizes the literature included in the review of Ebola in Liberia. Liberia was the focus for the purposes of this systematic review because of its high prevalence of Ebola in comparison to other affected areas. Similar to Zika, the reviewed literature spoke to all of the themes identified in the politics of epidemics literature (socioeconomic distribution of disease and the attribution of infectious disease responsibility), with the exception of decision-making in research and development. Ebola virus disease has been predominantly reported in lowincome countries, with the last outbreak in 2014 reported to have caused 11,323 deaths worldwide and 4809 deaths in Liberia alone . The most affected countries typically had poor health infrastructure . It is only during the most recent outbreak that the virus spread to high-income countries . Within these countries, and as was exemplified in Liberia, the outbreak mostly affected those populations that were very poor, remote, and lacked proper physical infrastructure, including roads, proper sanitation, and health facilities. The burden of the Ebola epidemic fell disproportionately on the most disadvantaged Liberians, fundamentally politicizing the epidemic. The socioeconomic distribution of Ebola was political, as defined for the purposes of this review, in that many scholars attribute its emergence to the socioeconomic conditions of Liberia. For example, poverty, and subsequent limited investment into the health system led to weak public health infrastructure, insufficient information technologies, a lack of trained personnel, inadequate case reporting, was often cited as the key vulnerability of Liberia . Additional challenges included mobility of particular populations, authority distrust , economic instability, and a lack of governance (McNamara 2016). These challenges intersected with other stressors, such as climate change and food insecurity to intensify the effects of the Ebola epidemic ). Many rural communities in Liberia were particularly vulnerable, extremely poor, and lacking secure access to food and health clinics . Similar to the other epidemics, people living in poverty were most vulnerable to the effects of the Ebola epidemic. Poverty facilitated the spread of the Ebola virus; thus, interventions aimed at addressing poverty would be most effective in containing further transmission . Socioeconomic inequality was additionally evident in the Binequitable management of the dead^(Pellecchia, Crestani, Decroo, Van en Bergh, Al-Kourdi, 2015, p. 1) . While cremation was a less acceptable cultural practice , it was mandated to limit further Ebola transmission . The new practice seemed to have impacted the economically disadvantaged who could not afford to pay for private burial services instead of cremation, aggravating socioeconomic divides . The disorganized and delayed response has been labeled a global health governance failure by Roemer-Mahler and Rushton (2016) who argue that Bthe outbreak was not only a global health problem but also a global political problem^(p. 374) . echoes a similar criticism in noting that development aid was used in contexts with insufficient infrastructures for the aid to be effective and focused too heavily on issues unrelated to Ebola . The international responses are argued to be far too short term, framing the Ebola epidemic as an African, and therefore racialized problem , leading to global neglect of the disease. This scholarly evaluation of global responses to the Ebola virus politicizes the epidemic, calling into question the evidence used to inform response pathways, which were largely insufficient and inappropriate for the Liberian context. More credible sources of evidence would have considered the urgency of Ebola and the unique sociopolitical context in which it was spreading. The Liberian culture was blamed not only for causing the epidemic but for interfering with control measures. Jones (2014) criticizes this Bculturalist epidemiology^(pg. 1) that overlooks the wider global forces that promote the spread of Ebola, instead exoticizing Liberian culture to attribute responsibility . For example, traditional burial practices and the consumption of bush meat were identified as key etiological factors to the Ebola epidemic . Some analysts even suggest that these cultural practices, in addition to local distrust of authorities may have obstructed interventions . Therefore, population behaviors, such as education and safe burials and cremations, were proposed as targets for intervention . However, other proposed causes of the Ebola epidemic included seasonal triggers, infection of nonhuman primates, landscape modification by humans, poverty, inadequate public health infrastructure, conflict, and population growth . Failure to focus on these and narrowly focusing on cultural practices politicizes the Ebola epidemic; yet, public health authorities, governments, and academics have largely attributed disease responsibility to local culture. There were similarities and differences in the narratives about the different epidemics. Broadly, none of the epidemics had narratives relating to all four themes of the politics of epidemics. However, both Zika and Ebola had narratives on three of the four themes. The SARs literature addressed only two of the four themes. Notably, there was lack of relevant literature on the research and development theme. The finding that the literature on how decisions about what research is funded and conducted during disease epidemics almost exclusively focused on the ethical implications (and did not question the potential power imbalances with regard to who identified the research issue/question and who led the research for example), was surprising, since this was a key theme in the broad politics of epidemics literature. This could, in part, be a reflection of the limitations of the search engines and strategy used in the study which excluded publications, which were deemed biomedical. Conversely, it may be a reflection of limited support for critical research and related publications. A case-specific review of the literature has demonstrated the influence of power and privilege on the experience of an epidemic. In the case of Zika in Brazil, the communities most vulnerable to the virus are those with insufficient resources and infrastructure . Consequently, Zika has been socially distributed to exacerbate conditions of poverty. However, the voices of these most affected people are drowned out by more powerful and prestigious groups. This is seen in the comparison that makes between the politics of HIV/AIDS and the politics of Zika . HIV/AIDS research in Brazil procured greater funding because those affected tended to be very notable Brazilians with more dominant social and politics voices . The same pattern was reported in relationship to the Ebola outbreak in Liberia, where weak public health infrastructure aggravated Liberian experiences of Ebola , while interventions targeted cultural practices, ultimately disempowering the economically disadvantaged . This review found that infectious disease outbreaks disproportionately affect the poor, specifically communities with poor physical infrastructure and limited access to quality public health services. The link between income and politics of epidemics has been discussed in the social science literature, where poverty is perceived to be the greatest risk factor. For example, argued that disease outbreaks, e.g., Ebola systematically affecting poor people and are tied to regional trade networks . Building on this literature, Marcella (2011) uses the term structural violence to highlight the institutional biases, inequalities, and economic policies that emanate from global centers of power and privilege, which tend to marginalize poor people during outbreaks . These linkages highlight social and economic inequalities (within communities, societies and countries), which are complex and often ignored by the medical (and political) communities . Indeed, some of the narratives criticized the (epidemiological) evidence, which tends to overlook the role of poverty in the facilitation of disease spread. For instance, focusing on pregnancy in the case of Zika in Brazil was criticized for overlooking the conditions of poverty that might also/instead be responsible for the spike in cases of microcephaly . Furthermore, responding to Ebola in Liberia with developmental aid that is not designed for contexts with insufficient infrastructure was criticized again for overlooking the role of poverty . This limited focus on the role of poverty in the peer-reviewed medical literature calls into question the politics of the research process itself. For example, what institutions are funding the research and what are the interests of the stakeholders in the research process? Who gets funding to conduct the research? What are the advantages of overlooking poverty for those producing the evidence? By disregarding the role of poverty and income inequality, epidemic responses will remain insufficient, and may, instead worsen the situation of poor populations . Perhaps poverty remains unaddressed in epidemic responses as its origins in a neoliberalist society feel too deep to uproot. Ironically, disease outbreaks facilitated the development of the Global Public Health Intelligence Network (GPHIN), an information sharing platform whose aim is to improve the credibility and authority of public health specialists to manage an outbreak. The platform is thought to have reduced the time between the outbreak and reporting and is thought to contribute to 40% of WHO's early warning and played a role in SARS notification, and subsequent outbreaks . The review clearly showed the consistency in the scholarly literature, that it is common to find that complex issues are oversimplified, whereby culture is used to justify the assigning of blame to minority groups . The review highlighted the role of politics and power in shaping different narratives, whereby powerful institutions assert particular narratives (often marginalizing the populations), which are Bpushedt o frame policies, publications, interventions, and funding agendas, while the narratives of the marginalized populations (those voiced by or representing marginalized people) are marginalized . This type of outbreak narrative was evident in the three epidemics discussed in this paper. In Toronto, SARS became a racialized disease, ultimately victimizing and excluding the Asian-Canadian community . The spread of the Zika virus in Brazil is largely attributed to the consequences of globalization, including the widening habitat of the Aedes mosquito vector and increased human and air travel . Cultural explanations are prominent in the outbreak narrative speaking to Ebola in Liberia, specifically the human consumption of bush meat and local/traditional burial practices that involve the touching and kissing of the deceased ; World Health Organization 2017c; . When responsibility for the origin of the epidemic is reduced to cultural and ethnic minority groups, for example, the Asian community during the SARS epidemic, South American women living in poverty during the Zika epidemic, and communities engaging in traditional Liberian cultural practices during the Ebola epidemic, this further marginalizes the already vulnerable populations. It is important that emphasis is placed on the extreme vulnerability of these groups to an infectious disease outbreak, rather than placing blame and ultimately exacerbating experiences of oppression. The findings in this paper should be interpreted with caution. First, this systematic review relied upon published peer-reviewed literature. This overlooked documents, such as WHO reports, government documents, and books, which might have contained relevant information. For example, we are aware of critical texts in the form of books, which may have enriched this manuscript-specifically, and provide insight into how other cities affected by SARs attributed responsibility, talked about attribution of risk and responsibility for the disease; point to Singapore's attribution of responsibility and credibility of evidence; , while also note the racialization of the SARS epidemic in Toronto, reflecting on the stigmatization of Toronto's Chinese and South Asian communities . However, work positions globalization as both responsible for causing and responding to infectious disease outbreaks , such narratives are thought to create space to better understand how such processes might be repurposed as public health solutions. Finally, ground the Ebola outbreak in Liberia in the context of colonial legacies, specifically emphasizing that global public health responses were political in that the establishment of the public health infrastructure tasked with responding to Ebola was influenced by social inequality, colonialism, and racism (). By adopting a social science perspective, unpack the diverse factorssocial, political, environmental, medical, and legal-that facilitated the escalation of the Ebola crisis (). However, since the scope of this paper was limited to peer-reviewed journal publications and one outbreak from each income context as an illustration, such information (from books and from other contexts) although relevant, was beyond the paper's scope. Another limitation is the time frame of the study. Scholarly literature is consistently evolving, and in the specific case of the Zika epidemic, which was considered a public health emergency of international concern at the time of data collection, new and relevant research was being produced after data collection ended. For feasibility purposes, data collection ended in June 2017. It is also important to note that research and research publications sometimes tends to be biased and may marginalize the narratives by or representing the most vulnerable poor populations and political topics. Furthermore, social science literature that did not fit the definition of politics as articulated for the purposes of this review was excluded. A future review might seek to unpack the themes that emerge from this additional literature. This systematic review of the politics of three different outbreaks in three different social economic contexts revealed that the politics of epidemics are-to an extent-universal. However, the manner by which the politics are played out vary by the income setting, the political themes that speak to general epidemics were found to be uniquely enacted during the SARS outbreak in Toronto, the Zika outbreak in Brazil, and the Ebola outbreak in Liberia. Perhaps the most universal finding of this systematic review is the role of social and economic inequality, including poverty during an epidemic. Regardless of the national income setting, minority and marginalized communities are the most devastated by an epidemic. If organizations and governments are to adequately respond to these individuals and communities, it is critical that narratives of those most vulnerable to an epidemic-specifically poor communities-are represented in the mainstream media as well as in the peer reviewed published literature-especially, the epidemiological and medical literature that tends to influence health programming and policy-making. Funding Information LK's research is funded by the Canadian Institutes for Health Research. AR is a PhD candidate. Ethical Considerations This is a review paper. No human subjects were involved in the study. The authors declare that they have no conflicts of interest.",
#  'It is amazing that, within a short time span of less than 17 years, two similar epidemic outbreaks occurred in China: SARS-1 in 2002 and SARS-2 in 2019. Although identification of viral origin(s) is very critical for understanding these epidemics, a study comparing a wide variety of natural and social factors potentially influencing the progression and the trajectory of these epidemics is also important. Through a comparative analysis of environmental factors and human activities in these two serious public health events, we wish to find some common ground for the occurrence of SARS-1 and SARS-2. The environmental situation of another coronavirus outbreak also seems to support the above-mentioned theory. MERS-CoV was first detected in a patient living in Jeddah, Saudi Arabia, in June of 2012 . The annual rainfall in Jeddah is low at 61mm, and there was no rain at all in June of that year in Jeddah . Therefore, relative to temperature, low humidity seems to be a more critical environmental factor influencing outbreak of human coronavirus disease. Thus, humans might become unfortunate hosts for SARS-CoVs as a result of some inappropriate interactions with wildlife and thus exposure to unfriendly viruses (  In agreement with these natural characteristics, bats have been found to inhabit locations near Yangtze River Bridge, which has rows of green lights that are tuned on for all of the night-time. Incidentally, Huanan Seafood Market is only 20 minutes away from this bridge. Bats gathered near the Yangtze River Bridge might have released the virus and even infected intermediate hosts for some time. The cold and dry winter helped viruses to survive in the environment and eventually found some ways to cross the species barrier, a phenomenon known as "viral chatter" . The increased vulnerability of human beings in winter time and the increased human exposure to wild animals during holidays made infection to SARS-COV-2 more likely. With so many bats concentrated into a local area, the spreading of viruses by bats might be much wider than just being restricted to one wildlife trading place such as the Huanan Seafood Market. The viruses might have lived in this big "incubation bed" for some time and achieved some mutations before jumping on to the final hosts-human beings. Although the origins and the occurrences of SARS-CoV-2 are both unclear, the control measures for the current epidemic should focus on immediate cut-off of transmission of the disease and through disinfection of infected locations. Quarantine of patients (both confirmed and suspected), isolation of susceptible population, and protection of high-risk professions are necessary measures for reducing exposure to the viruses and eliminating the risk of getting infected by the viruses. At the same time, infected locations must be adequately disinfected. Areas that will be open to the public should be carefully surveilled for the existence of SARS-CoV-2 and be cleaned of the virus if it is found. Modern communication methods should be effectively used for passing reliable information on the epidemic status, the treatment measures, and the self-protection skills, among others. As a matter of fact, if fine-tuned and highly-effective internet control for "public opinions" can be turned into beneficial use of monitoring the "epidemic situation", fighting against an even larger outbreak of any infection would be much easier and cost-effective. SARS-CoV-2 has entered human communities, and eliminating virus from human bodies does not means its eradication in nature. The risk of SARS-CoV-2 infection will remain for a long time. Thus, adequate cautions must be taken for safe-guarding against future outbreaks of SARS. The prevention can be achieved by implementing a multi-facet system that considers both natural and social aspects of the SARS epidemiology discussed earlier. For example, regular surveillance of viral status in nature should be carried out to monitor the variation/evolution and abundance/localization of the virus. This information may be served as an early warning and used for preparation of potential vaccines. The government should issue laws and policies to tighten protection of wildlife and prohibit consumption of wild animals. A grass-roots and transparent reporting system should be established and put into public use for reporting any case of confirmed or suspected human infection. The disease-reporting system should be organically synchronized with the meteorological system so that adverse environmental conditions conducive for viral infection on human beings can be forecasted and macro-scale preparations can be made in case an emergency occurs. Finally, but not lastly, in developing human society including building massive constructions for residence and transportation, potential ecological impact on wildlife and possible consequences of breaking natural balance of the ecosystems should be carefully evaluated. Author Contributions: All authors have made a contribution to this manuscript. Z.S. designed, drafted, and edited the initial manuscript. K.T. reviewed and edited the initial manuscript. S.S.K. edited the initial manuscript. G.H. conceptualized and designed the framework of the manuscript. S.V.L. wrote the revision of the manuscript and brought many of his independently originated ideas into the revised manuscript. All authors have read and agreed to the published version of the manuscript. Funding: This work is supported in part by the National Natural Science Foundation of China under grant no. 71964020.',
#  'The aim of the current study was to explore the effect of sustained transmission from the four Chinese cities of Wuhan, Beijing, Shanghai and Guangzhou on international disease importation risk to 168 countries and territories, with a specific focus on Africa where current levels of healthcare infrastructure could provide a significant challenge for managing this novel epidemic. The current situation is extremely dynamic and since then some countries have instigated flight restrictions and closed borders (e.g. Russia). These decisions were relevant for these locations but not based on probabilities. WHO has not recommended a cessation of transportation to free countries but suggested preventive measures. This would seem appropriate for Africa and South America with the caveat that only one case is needed to initiate a local epidemic without proper biosecurity and quarantine measures, whilst other regions will need to decide on a case-by-case basis through appropriate risk assessment.',
#  'In December 2019, China detected many cases of viral pneumonia-like disease similar to SARS that were confirmed to be caused by novel Betacoronavirus, provisionally called 2019 novel coronavirus (2019-nCoV). Since then, the novel coronavirus outbreak has raised attention throughout the world. Although the potential cause of the disease is still unknown, initial reports predicted that the virus is possibly of zoonotic origin. 2019-nCoV is the causative agent for severe respiratory infection in humans termed as novel coronavirus-infected pneumonia (NCIP) . nCoV is the third known coronavirus that causes fatal respiratory diseases in humans after highly pathogenic viruses SARS-CoV and MERS-CoV. Chinese researchers isolated the novel coronavirus from the infected patient in early 2020. As the virus is closely related to other bat coronaviruses, it is suspected that the bats are the primary reservoir for the virus. However, it is still unclear that, if the virus transmitted to humans directly from the bats or whether through an intermediate host. Detailed understanding of the enzootic patterns of the virus, its evolution, and surveillance are essential to control the disease and possibly to prevent the future epidemics of similar viruses. The transmission of 2019-nCoV is often spread from person to person through the respiratory droplets generated during coughs or sneezes from an infected person. Human-to-human transmission is reported in countries such as Germany, Japan, Vietnam, and the United States . The confirmed cases through inter-human transmission have increased the fear and panic accompanying the 2019-nCoV outbreak. It is still unknown whether the virus spreads only through human contact or if there is possible transmission through oral-fecal contact as well. The incubation time varies from 2-14 days after infection. The clinical presentation of this infection resembles SARS-CoV characterized with fever, dry cough, and shortness of breath in most of the cases, whereas non-respiratory symptoms such as headache, muscle ache, dyspnoea, rhinorrhoea, sneezing, sore throat, diarrhea, nausea, and vomiting are also reported in few patients. The affected persons also develop acute respiratory distress syndrome. Cases with critical illness showed respiratory failure, septic shock, and organs failure, which require intensive care support . At this time, the knowledge about this virus is limited. New cases and mortalities are increasing daily. As a newly emerging viral infection, there is no vaccine or anti-viral therapeutics to treat human coronavirus infection till now. As of now, preventing infection is the current priority for disease control. The current protocol for infected patients is to quarantine and provide supportive management and palliative care. The best way to avoid the virus infection is to keep oneself away from infected people and the utmost personal hygienic care is essential. Quarantine measures shall be taken to separate, restrict the movement of infected people, and also the normal population from the regions where there is an epidemic outbreak. The WHO recommended precautionary measures to the general public, such as frequently cleaning hands, wearing a face mask, avoiding close contact with the infected persons or farm animals, and avoiding consumption of raw or half-cooked meat/eggs and following good food safety practices . There is an urgent need to develop rapid diagnostic tools and vaccines or post-exposure prophylaxis to treat this infection. Reliable, timely laboratory diagnosis and an effective vaccine are crucial for effective disease management and public health intervention. An effective vaccine should be affordable, and also the production platform should produce suitable vaccine candidates rapidly at low cost, especially during a disease outbreak. The advantages and disadvantages of the current expression systems for recombinant protein production are given in . Currently, plant expression system offers many advantages over other conventional systems that have the potential to tackle the production of vaccine candidates rapidly at affordable cost facilitating the global vaccination programs, especially in resource-poor nations where the vaccines are needed most . The coronavirus outbreak has been declared a global health emergency and represents one of the greatest risks to global health, as the virus has a tendency to infect a large number of human populations, and the outbreak can cause severe medical complications with economic impact, particularly in middle-income countries where resources are limited for early diagnosis and preventive measures. Human mobility, air travel, and international trade can likely increase the number of cases in other regions as well. Continued surveillance along with the robust response of government agencies, medical practitioners, and researchers, is highly essential for the effective management of this emerging pathogen. Public health officials need to identify the source and virus reservoir, transmission cycle, pathogenesis, inter-human transmission, and clinical manifestations, which might be helpful to develop animal models, diagnostic reagents, anti-viral therapies, and vaccines against this pathogen. As the virus emerged suddenly and became a serious global concern, there is a need for rapid vaccine development. Although classical expression systems for biopharmaceutical proteins are still amenable, the development of transient expression in plants has deeply influenced the pharmaceutical sector to produce affordable vaccines and biologics rapidly at low cost. Hence, the plant expression platform shall be employed for biopharmaceutical production to accelerate the fight against this deadly infectious disease. The collaborative efforts of researchers are highly desirable to use a plant expression platform for producing an efficient cost-effective vaccine to control this epidemic. The continuous effort of research in this direction might be helpful in producing high-value biologics and pharmaceuticals on a large scale in a short time, especially during epidemics. ',
#  "Since December 2019, a new type of coronavirus called novel coronavirus (2019-nCoV, or COVID-19) was identified in Wuhan, China. The COVID-19 has then rapidly spread to all over China and the world. It can cause symptoms including fever, difficulty in breathing, cough, and invasive lesions on both lungs of the patients . It can spread to the lower respiratory tract and cause viral pneumonia. In severe cases, patients suffer from dyspnea and respiratory distress syndrome. The pandemic has a big number of infected patients that far exceeded the equivalents of Severe Acute Respiratory Syndromes (SARS) and Middle East respiratory syndrome (MERS), though with a lower fatality rate. According to the surveillance statistics reported by the Chinese government, by February 19, 2020, the number of confirmed infection cases increased to 44,412 for Wuhan and 74,280 for whole China, with 1497 and 2009 deaths respectively. Moreover, the pandemic has caused 919 confirmed infection cases and 3 deaths globally. Therefore, Wuhan city and Hubei Province are the targets for intensive interventions. Otherwise, the spread would have been much faster to all China and the world. Wuhan is a transportation hub of China, it is a highly dense city and has a large population of more than 14 million in 2019. The World Health Organization (WHO) had a meeting on January 30, 2020 and they declared the coronavirus outbreak from China a public health emergency of international concern. Further, there are lots of concerns and debates all over the world, indicating a need for more understanding of China's systems in responding to the outbreak. Therefore, based on our firsthand experience of working with few of the COVID-19 cases, the purpose of this article is to have a brief report of current development, challenges, and future directions of the coronavirus outbreak in Wuhan. It is recognized by the international community that China has made remarkable progress in responding effectively to the outbreak . What made China address the epidemic faster is its ability to finance and mobilize resources combined with its strong governance structure, efficient execution, and solidarity of the whole society. It just took 1 month for China to recognize the existence of a novel coronavirus after the first case was reported, followed with a series mandatory actions in both Wuhan and all over China. In contrast, it took more than 4 months for SARS. On December 31, 2019, delegates of the Chinese Center for Disease Control and Prevention (CDC) went to Wuhan for field investigations, and the sample of new virus was isolated and further identified as a pathogen of unexplained pneumonia on January 6, 2020. The genome-wide sequence of the virus was decoded in the next few days . After recognizing it as an emergency epidemic on January 22, 2020, strong measures have been adopted immediately by Wuhan local authorities to characterize and control the epidemic, including isolation of suspected cases for treatment, close monitoring of contacts, epidemiological and clinical data collection from patients, and development of diagnostic and treatment procedures. More and more hospitals have been designated by the government to treat infected patients. Thousands of people have been quarantined in the new built hospitals such as Huoshenshan Hospital, Leishenshan Hospital, and Fangcang Hospital to provide care for the confirmed infection patients in Wuhan. In the meantime, patients with different severity are being treated in different hospitals. Thousands of medical professionals nationwide came to Wuhan and other cities in Hubei Province for assistance. Many kinds of guidelines have been developed, and useful information about risk factors and preventive measures are recommended to the public by various means . It is found that the COVID-19 can be transmitted through droplets, contact, aerosol, etc. . A person will not be infected if he washes his hands before touching the conjunctiva. Accordingly, measures such as washing hands, wearing masks and goggles are very effective to prevent potential infections. Further, Wuhan has implemented closed management of communities. Inhabitants are not allowed to go out of their communities and they are very supportive to this regulation. COVID-19 detection kits have been developed and the test results can be generated within 6 h, which is helpful for early diagnosis, treatment and judgment of the treatment effect. Although the number of patients with COVID-19 infections is large in Wuhan, the fatality rate is much lower (3.37%, by February 19, 2020) compared with that of SARS (11%, 2003). By February 19, 2020, 4895 people have been recovered after treatments and most of them are mild cases. It is the first time for the COVID-19 to infect humans and can be transmitted from person to person . The incubation period can be 2 weeks and even longer. Besides, the virus can spread during the incubation period or recessive infection, which makes it difficult to identify those suspected cases without clinical symptoms for prompt control. By far, the numbers of new infections and deaths have already exceeded the equivalent numbers of cases with SARS. If the situation cannot be fully controlled in Wuhan and Hubei province, the situation may deteriorate in other places of China and the world. The government of all levels have been taking strong leadership to combat the outbreak and in recent days there is a decline trend of new cases. However, some challenges still remain and need to be addressed: (1) The large number of confirmed and suspected cases in Wuhan make people staying with them in high risk of getting infected because of the contagiousness of the new virus . This is specially the case for medical professionals. By February 11, 2020, 1716 medical personnel have been infected and six of them died from all over China. Moreover, it is very difficult to identify those people without obvious symptoms, making their families in high risk of getting infected. (2) Hospitals have drastically constrained other services to meet the hospitalization needs of the outbreak in Wuhan. At the early stage, with more people getting infected and less of them recovered, medical facilities, personnel and protective supplies were increasingly insufficient. In many cases, patients cannot be quarantined and treated in time, and many medical staff cannot get fully protected. More efficient logistic services are expected to deliver donated materials from both China and the international society to medical professionals and communities. However, these are all operating problems and the situation has much alleviated with the strong leadership of government of all levels. ",
#  "Long non-coding RNAs (lncRNAs), which are transcripts larger than 200 nt in length that lack protein-coding ability, have previously been described in mammalian cells . Most of them have a structure similar to mRNA; they have a 5 methylguanosine cap and are usually spliced and polyadenylated at their 3 termini. Notably, lncRNA expression shows significant cell and tissue specificity . Emerging evidence shows that non-coding RNAs have a regulatory role in multiple cellular processes, such as genomic imprinting, chromatin modification, and alternative splicing of RNA . Moreover, some diseases such as cancer and neurological disorders are also related to the dysregulated expression of lncRNA . Numerous studies have been conducted to ascertain their functional role during viral infection. For example, NRAV can promote influenza virus replication and virulence through negatively regulating the initial transcription of varieties of interferonstimulated genes (ISGs) . lncRNA-ACOD1, named by its neighboring coding gene aconitate decarboxylase 1, significantly reduces virus multiplication by directly interacting with the metabolic enzyme glutamic-oxaloacetic transaminase . Neat1, one of the lncRNAs induced by HIV-1 infection, is retained in the nucleus and serves as a scaffold for the nuclear paraspeckle substructure. Importantly, Neat1 deficiency enhances HIV-1 replication . Although large amounts of data have proved that several lncRNAs are involved in different kinds of virus infection, the mechanisms by which they act are still largely unknown. For RNA-seq, ST cells were infected with PDCoV at a multiplicity of infection (MOI) of 10; the medium for PDCoV infection was DMEM containing 0.2 ug/ml Trypsin that had been TPCKtreated (Millipore Sigma, St. Louis, MO, United States) for 11 h. Mock-infected cells were placed in the same volume of DMEM, with the same concentration of TPCK-treated Trypsin. Total RNA was isolated from each group using SuPerfecTRI TM Total RNA Isolation Reagent (Pufei, Shanghai, China) according to the manufacturer's instructions. The RNA quality was checked by 1% agarose gel electrophoresis. The purity and concentration of RNA were measured by NanoPhotometer R spectrophotometer (IMPLEN, München, Germany) and Qubit R RNA Assay Kit in Qubit R 2.0 Fluorometer (Life Technologies, Camarillo, CA, United States). RNA integrity was assessed using the RNA Nano6000 Assay Kit of the Bioanalyzer 2100 system (Agilent Technologies, Santa Clara, CA, United States). For quantitative RT-PCR (RT-qPCR), ST and IPEC-J2 cells were infected or mock-infected with PDCoV at an MOI of 10 and harvested at the indicated time. All experiments were conducted in triplicate. Gene Ontology (GO) enrichment analysis of differentially expressed genes or lncRNA target genes was conducted with respect to biological process, molecular function, and cellular component with the GOseq R package, in which gene length bias was corrected. Kyoto Encyclopedia of Genes and Genomes (KEGG) was used to perform pathway enrichment analysis 1 . KOBAS software was used to test the level of statistical significance of enrichment of differentially expressed genes and/or lncRNA target genes in KEGG pathways . Sequence (5 -3 ) Amplicon For each lncRNA, the Pearson correlation coefficient of its expression value with that of each protein-coding gene was calculated. Under the conditions of an absolute value of the Pearson correlation coefficient >0.998 and p < 0.00001, the interaction network of the differentially expressed lncRNAs and protein-coding gene co-expression pairs was then constructed using Cytoscape (v3.5.1) . Correlation analysis of DE lncRNA and protein-coding genes identified a number of DE lncRNA-DE protein-coding gene pairs. The main enriched KEGG pathways of these protein-coding genes were in metabolism and oxidative phosphorylation. In a recent report, 5-day-old neonatal pigs were infected with PDCoV, and transcriptome profile and KEGG pathway enrichment analysis were performed at different stages of infection . In our study, we found that the lncRNA targeted genes enriched in those pathways that were perturbed during the late stage of infection. In addition, the expression level of transglutaminase 3 (TGM3) and apolipoprotein A-2 (APOA2) in a study were significantly changed. Similarly, we also found that TGM1 was up-regulated, and APOA1, APOA4, and APOA5 were down-regulated during PDCoV infection (data not shown). Moreover, our data show that many cytokines and chemokines, which elicit an inflammatory response, were differentially expressed in the infected cells compared to mock cells. The inflammation causes injury to the intestinal tissues, resulting in diarrhea or even death. Raised CCL and CXCL10 levels were associated with the severity of virus infection . Here, we identified a number of lncRNAs that may regulate the expression of these inflammatory molecules. In the present study, the expression profiles of lncRNAs were determined in PDCoV-infected ST cells. In total, 1,190 novel lncRNAs were identified. A total of 830 lncRNAs were differentially expressed between PDCoV-infected or mockedinfected ST cells. KEGG pathway analysis of DE lncRNA coexpressed genes revealed that they might be primarily involved in regulating metabolism and TNF signaling pathways. Our study systematically characterizes lncRNA expression during PDCoV infection and provides a useful resource for identifying and functionally characterizing the cognate gene products of those lncRNAs. This study will also be useful for assigning lncRNAs as potential biomarkers of PDCoV infection and designing better preventive and therapeutic measures against the virus infection, which would be economically beneficial for the pig farming community. The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation, to any qualified researcher. JLL, JG, and JZ conceived and designed the experiments. FW, LD, and JL performed the experiments. JLL, YY, YJ, and TY analyzed the data. JLL drafted the manuscript. All authors read and approved the final manuscript. ",
#  "On Nov. 27th, 2002, a respiratory illness erupted in Guangdong Province, China . In Feb, 2003, the Chinese Ministry of Health announced that this acute respiratory syndrome had thus far resulted in 305 cases and five deaths . The following month, there were clusters of atypical pneumonia reported in other parts of mainland China, Hong Kong , Canada , and Singapore . In Jul, 2003, SARS-CoV spread across 26 countries in six continents, and caused a cumulative 8,096 cases and 774 deaths (9.6%) . In particular, a higher mortality (21%) was found in hospital personnel . On Dec. 29th, 2019, the health departments of Hubei Province received a report that four employees of the South China Seafood Wholesale Market were diagnosed with unknown-caused pneumonia in a local hospital, which was the first report of SARS-CoV-2 . On Dec. 31st, 2019, the National Health Commission of People Republic of China and Chinese Center for Disease Control and Prevention (China CDC) participated in the investigation and case-searching work . On the same day, the government of Wuhan released information about the disease outbreaks to society . Nowadays, the number of patients infected with SARS-CoV-2 continues to climb worldwide. By the date of this paper's submission, a cumulative 67,081 cases and 1,526 deaths (2.1%) were reported worldwide. In Wuhan, China, the number is 37,914. The main timeline of SARS and COVID-19 epidemic development were shown in ,b, respectively. Glucocorticoid and interferon Lopinavir/ritonavir (in testing) On Nov. 27th, 2002, a respiratory illness erupted in Guangdong Province, China . In Feb, 2003, the Chinese Ministry of Health announced that this acute respiratory syndrome had thus far resulted in 305 cases and five deaths . The following month, there were clusters of atypical pneumonia reported in other parts of mainland China, Hong Kong , Canada , and Singapore . In Jul, 2003, SARS-CoV spread across 26 countries in six continents, and caused a cumulative 8,096 cases and 774 deaths (9.6%) . In particular, a higher mortality (21%) was found in hospital personnel . On Dec. 29th, 2019, the health departments of Hubei Province received a report that four employees of the South China Seafood Wholesale Market were diagnosed with unknown-caused pneumonia in a local hospital, which was the first report of SARS-CoV-2 . On Dec. 31st, 2019, the National Health Commission of People Republic of China and Chinese Center for Disease Control and Prevention (China CDC) participated in the investigation and case-searching work . On the same day, the government of Wuhan released information about the disease outbreaks to society . Nowadays, the number of patients infected with SARS-CoV-2 continues to climb worldwide. By the date of this paper's submission, a cumulative 67,081 cases and 1,526 deaths (2.1%) were  The initial symptoms of SARS patients were fever (100%), cough (61.8%), myalgia (48.7%), dyspnea (40.8%), and diarrhea (31.6%) , and the prognosis of patients was associated with host characteristics (including age, gender, etc.) . During hospitalization, respiratory distress occurred in 90.8% of SARS patients . The duration from disease onset to severe respiratory distress was an average of 9.8 ± 3.0 days . During the disease course, some patients developed leukopenia, lymphopenia, and thrombocytopenia with an upregulation of aspartate transaminase (AST), alanine aminotransferase (ALT), lactic dehydrogenase (LDH), and C-reactive protein (CRP) . In comparison, COVID-19 showed similar trends with SARS patients . Fever, fatigue, and dry cough are the main manifestations of the patients, while nasal congestion, runny nose, and other symptoms of the upper respiratory tract are rare. Beijing Centers for Diseases Control and Prevention indicated that the typical case of COVID-19 has a progressive aggravation process. COVID-19 can be classified into light, normal, severe, and critical types based on the severity of the disease : (1) Mild cases-the clinical symptoms were mild, and no pneumonia was found on the chest computed tomography (CT); (2) normal cases-fever, respiratory symptoms, and patients found to have imaging manifestations of pneumonia; (3) severe cases-one of the following three conditions: Respiratory distress, respiratory rate ≥ 30 times/min (in resting state, refers to oxygen saturation ≤ 93%), partial arterial oxygen pressure (PaO2)/oxygen absorption concentration (FiO2) ≤ 300 mmHg (1 mmHg = 0.133 kPa); (4) critical cases-one of the following three conditions: Respiratory failure and the need for mechanical ventilation, shock, or the associated failure of other organs requiring the intensive care unit . The current clinical data shows that the majority of the deaths occurred in the older patients. However, severe cases have been documented in young adults who have unique factors, particularly those with chronic diseases, such as diabetes or hepatitis B. Those with a long-term use of hormones or immunosuppressants, and decreased immune function, are likely to get severely infected. According to the demographic information of SARS patients, infection occurred in all age groups (the average age was dyspnea (40.8%), and diarrhea (31.6%) , and the prognosis of patients was associated with host characteristics (including age, gender, etc.) . During hospitalization, respiratory distress occurred in 90.8% of SARS patients . The duration from disease onset to severe respiratory distress was an average of 9.8 ± 3.0 days . During the disease course, some patients developed leukopenia, lymphopenia, and thrombocytopenia with an upregulation of aspartate transaminase (AST), alanine aminotransferase (ALT), lactic dehydrogenase (LDH), and C-reactive protein (CRP) . In comparison, COVID-19 showed similar trends with SARS patients . Fever, fatigue, and dry cough are the main manifestations of the patients, while nasal congestion, runny nose, and other symptoms of the upper respiratory tract are rare. Beijing Centers for Diseases Control and Prevention indicated that the typical case of COVID-19 has a progressive aggravation process. COVID-19 can be classified into light, normal, severe, and critical types based on the severity of the disease : (1) Mild cases-the clinical symptoms were mild, and no pneumonia was found on the chest computed tomography (CT); (2) normal cases-fever, respiratory symptoms, and patients found to have imaging manifestations of pneumonia; (3) severe cases-one of the following three conditions: Respiratory distress, respiratory rate ≥ 30 times / min (in resting state, refers to oxygen saturation ≤ 93%), partial arterial oxygen pressure (PaO2)/oxygen absorption concentration (FiO2) ≤ 300 mmHg (1 mmHg = 0.133 kPa); (4) critical cases-one of the following three conditions: Respiratory failure and the need for mechanical ventilation, shock, or the associated failure of other organs requiring the intensive care unit . The current clinical data shows that the majority of the deaths occurred in the older patients. However, severe cases have been documented in young adults who have unique factors, particularly those with chronic diseases, such as diabetes or hepatitis B. Those with a long-term use of hormones or immunosuppressants, and decreased immune function, are likely to get severely infected. According to the demographic information of SARS patients, infection occurred in all age groups (the average age was ≦45) . There was a proportional difference between male and female (female predominance) , with a male-to-female ratio of 1:1.25 . In addition, hospital staff had a higher risk due to the proximal interactions with large numbers from the infected population. For example, hospital staff accounted for 22% of all cases in Hong Kong and 22.8% in Guangdong . The mortality caused by SARS increased with age (> 64 years) , and the overall mortality rate during the outbreak of SARS was estimated at 9.6% . Li et al. reported that people who have not been exposed to SARS-CoV-2 are all susceptible to COVID-19 . Among the 8,866 patients who have been confirmed with COVID-19, nearly half of the patients have been aged 50 years or older (47.7%) . The male-to-female ratio is about 2.7:1 and the average incubation period is 5.2 days . However, severe COVID-19 cases and deaths have mostly been in the middle-aged adults and the elderly with long smoking histories or other 45) . There was a proportional difference between male and female (female predominance) , with a male-to-female ratio of 1:1.25 . In addition, hospital staff had a higher risk due to the proximal interactions with large numbers from the infected population. For example, hospital staff accounted for 22% of all cases in Hong Kong and 22.8% in Guangdong . The mortality caused by SARS increased with age (> 64 years) , and the overall mortality rate during the outbreak of SARS was estimated at 9.6% . Li et al. reported that people who have not been exposed to SARS-CoV-2 are all susceptible to COVID-19 . Among the 8,866 patients who have been confirmed with COVID-19, nearly half of the patients have been aged 50 years or older (47.7%) . The male-to-female ratio is about 2.7:1 and the average incubation period is 5.2 days . However, severe COVID-19 cases and deaths have mostly been in the middle-aged adults and the elderly with long smoking histories or other basic diseases, such as heart disease and hypertension . At the time that this paper was been submitted, COVID-19 patients mortality rate was 2.1% . According to the WHO data on Jul. 31th, 2003 , a total of 8,096 clinically diagnosed cases of SARS were reported worldwide, with 774 deaths and 26 countries and regions affected ). Most cases were in Asia, Europe, and America. The main countries in Asia were China (including mainland, Macao, Hong Kong, and Taiwan), Singapore, and so on. The total number of cases in mainland China was 5,327, with 349 deaths . The cases were mainly concentrated in Beijing, Guangdong, and Shanxi ( ) . In total, 2,102 patients were from Hong Kong, Macao, and Taiwan, with 336 deaths . Viruses 2020, 12, x FOR PEER REVIEW 5 of 18 basic diseases, such as heart disease and hypertension . At the time that this paper was been submitted, COVID-19 patients mortality rate was 2.1% . The new emerging SARS-CoV-2 shares about 80% of the gene sequence of SARS-CoV, released by the Military Medical Research Institute of Nanjing Military Region in 2003 . Recently, Shi et al. reported that the sequence similarity of coronavirus between SARS-CoV-2 and the coronavirus isolated from Rhinolophus affinis is 96.2%, and suggested that bats may be the source of the virus . So far, the intermediate hosts of SARS-CoV-2 are elusive and have been reported to be snakes, minks, or variable others . Recently, a research group of South China Agricultural University reported that pangolins may be one of the intermediate hosts for SARS-CoV-2, by analyzing more than 1,000 metagenomic samples, because they found that 70% of pangolins are positive for the coronavirus. Moreover, the virus isolate from pangolin shared 99% sequence similarity with the current infected human strain SARS-CoV-2 . Taking this recent research into consideration, we agreed that pangolin is more likely to be one of intermediate hosts of SARS-CoV-2. According to the latest data on Feb. 14th, 2020 , there have been a total of 67,081 clinically diagnosed cases of COVID-19 in worldwide, with 1,526 deaths. A total of 25 countries and regions have infected people. Due to the Spring Festival transportation peak, the disease has been spread more rapidly across China ). As the origin area of COVID-19, Hubei province has been the most severely infected area, with 54,406 cumulative diagnosis cases. Wuhan city has 37,914 cases. Guangdong, Henan, and Zhejiang province have 1,294 cases, 1,212 cases, and 1,162 cases, respectively ). At present, the COVID-19 outbreak has been spread to all parts of China and around the world, including the United States, Thailand, and Japan. It has been noticed that most of these patients have ever been to Wuhan or contacted with people who had been in Wuhan. The distribution of COVID-2019 patients in China (including Hong Kong, Macao and Taiwan) and Hubei Province is shown in . SARS were reported worldwide, with 774 deaths and 26 countries and regions affected . Most cases were in Asia, Europe, and America. The main countries in Asia were China (including mainland, Macao, Hong Kong, and Taiwan), Singapore, and so on. The total number of cases in mainland China was 5,327, with 349 deaths . The cases were mainly concentrated in Beijing, Guangdong, and Shanxi ) . In total, 2,102 patients were from Hong Kong, Macao, and Taiwan, with 336 deaths . According to the latest data on Feb. 14th, 2020 , there have been a total of 67,081 clinically diagnosed cases of COVID-19 in worldwide, with 1,526 deaths. A total of 25 countries and regions have infected people. Due to the Spring Festival transportation peak, the disease has been spread more rapidly across China ). As the origin area of COVID-19, Hubei province has been the most severely infected area, with 54,406 cumulative diagnosis cases. Wuhan city has 37,914 cases. Guangdong, Henan, and Zhejiang province have 1,294 cases, 1,212 cases, and 1,162 cases, respectively ). At present, the COVID-19 outbreak has been spread to all parts of China and around the world, including the United States, Thailand, and Japan. It has been noticed that most of these patients have ever been to Wuhan or contacted with people who had been in Wuhan. The distribution of COVID-2019 patients in China (including Hong Kong, Macao and Taiwan) and Hubei Province is shown in .  As the number of COVID-19 patients in China has been growing rapidly, preventing the spread of SARS-CoV-2 is the most important and urgent task . It was shown that human-to-human transmission of SARS-CoV-2 has spread via droplets or close contacts , but aerosol and fecal-oral transmission still need further study . To reduce virus transmission, early detection and isolation are essential. In addition, close monitoring in crowded places is also important . The possible pathogens of SARS and COVID-19 are both derived from wild animals . Therefore, hunting, selling, and eating wild animals not only seriously damage the ecosystem, but also lead to the spread of epidemic diseases . Thus, banning all wildlife trade is an effective measure to prevent viral prevalence. Wearing level-D protective clothing can protect medical staff from infection of respiratory viruses . A vaccine against SARS-CoV has not been described in any published articles . However, on Jan. 26th, 2020, the China CDC started to develop a new vaccine for SARS-CoV-2. The virus has been successfully isolated and seed strains have been screened . The early symptoms of SARS and COVID-19 are very similar to winter influenza, and the most important way to distinguish flu and pneumonia is to take throat swabs for viral testing . Current diagnostic tests for coronavirus include RT-PCR, real-time reverse transcription PCR (rRT-PCR), reverse transcription loop-mediated isothermal amplification, as well as real-time RT-LAMP . National Medical Products Administration has approved seven new nucleic acid test reagents for coronavirus, which were developed based on fluorescence PCR by Feb. 1st, 2020 . Suspected infections can be detected accurately and quickly for timely isolation and treatment to avoid infecting others by using these test reagents. Both SARS-CoV and SARS-CoV-2 are CoVs; hence, the treatment strategies of SARS could be relevant for COVID-19 . In 2003, SARS was mainly treated by isolation of the patients, hormones treatment, antiviral and symptomatic treatments, and many drugs such as glucocorticoid and interferon . Now, isolation, antiviral, and symptomatic treatments are still mainly adopted for COVID-19 treatment. As effective drugs for SARS, hormones and interferons can also be used to treat COVID-19 . Lopinavir is one kind of protease inhibitor used to treat HIV infection, with ritonavir as a booster. Lopinavir and/or ritonavir has anti coronavirus activity in vitro. Hong Kong scholars found that, compared with ribavirin alone, patients treated with lopinavir/ritonavir and ribavirin had lower risk of acute respiratory distress syndrome (ARDS) or death caused by SARS-CoV . Lopinavir/ritonavir has also been clinically tested in treatment of COVID-19, and showed wonderfully effective treatment for some patients, but the general clinical effect has not been determined . More effective treatments are still under continuing exploration: On Jan. 25th, 2020, a joint research team from the Shanghai Institute of Materia Medica, Chinese Academy of Sciences, and Shanghai Tech University screened and identified 30 potential drugs that are reported to be effective against SARS-CoV-2 . A high-resolution crystal structure of SARS-CoV-2 coronavirus 3CL hydrolase (Mpro) was announced after the outbreak of COVID-19 in the world , and human coronaviruses (HCoVs) have been treated as severe pathogens in respiratory tract infections. Nelfinavir was predicted to be a potential inhibitor of SARS-CoV-2 main protease . The first patient in the US had been trial-treated with intravenous remdesivir (a novel nucleotide analogue prodrug in development) due to a severe infection . No adverse reactions were observed during the administration, and the patient's condition was effectively improved . Clinical trials of remdesivir for treatment of COVID-19 just started on Feb. 5th and 12th, 2020 in Wuhan and Beijing, respectively, and the experimental results remain unclear . Many studies have been performed to study the pathogenesis of SARS-CoV . The spike (S) protein and N protein confer stability to the viral particle . The N protein is a structural protein involved in virion assembly, and plays a pivotal role in virus transcription and assembly efficiency . S protein can bind to the cellular receptors of sensitive cells and mediate infection of their target cells, after which it begins to replicate in the cytoplasm . SARS-CoV mainly targets the lungs, immune organs, and small systemic blood vessels and causes systemic vasculitis and decrease of immune function . More seriously, the infection leads to extensive pulmonary consolidation, diffuse alveolar damage, and the formation of a transparent membrane, finally deteriorating to respiratory distress . As a large number of people have left Wuhan, the control of the epidemic situation is extremely urgent, and the treatments of COVID-19 are imminent. On Feb. 14th, 2020, there were more than 54,000 confirmed patients in Hubei province, China . Due to the lack of effective antiviral drugs, the prognosis of patients solely depends on their age and physical condition . Although it was reported that the clinically recovered patients exceed the number of dead, the majority of the patients are still not cured in hospital. In addition, the potential adaptive mutation of SARS-CoV-2 makes it difficult for vaccine development. Therefore, it is urgent for us to develop more sensitive inspection methods and effective drugs. In addition to the well-known SARS-CoV, MERS-CoV, as one Merbecovirus subgenus of β-CoVs, is also extremely invasive. MERS-CoV is the pathogen of the Middle East Respiratory Syndrome, which can infect both humans and animals, and can be transmitted through camels . It mainly occurs in Saudi Arabia and has a high mortality rate . Studies had demonstrated that the clinical course of SARS and MERS was highly similar, and SARS and MERS may have similar pathogenesis . The genome sequence of SARS-CoV-2 also shows some similarities to that of MERS-CoV. It will be very interesting to study the relationship among SARS-CoV, MERS-CoV, and SARS-CoV-2 that may be exploited for future developing broad-spectrum antiviral therapies. Although more and more studies for SARS-CoV-2 have sprung up since the outbreak of this epidemic COVID-19, based on our comparison, we propose some key questions to be clarified in future studies ). In-depth understanding the underlying pathogenic mechanisms of SARS-CoV-2 will reveal more targets for better therapy of COVID-19. . Proposed questions to study SARS-CoV-2 for future studies. Can the susceptibility of asymptomatic carriers be judged by detecting the serum reactivity level of N protein? Apart from droplet transmission and contact transmission, are there other methods to transmit SARS-CoV-2? What is the percentage of COVID-19 patients have been infected with SARS and produced antibodies? Does traditional Chinese medicine have any effect on the treatment of COVID-19 caused by SARS-CoV-2? Do any environmental factors, such as regional conditions or climate, affect SARS-CoV-2 transmission? Author Contributions: Conceptualization, L.X., Y.W. and X.G.; methodology, J.X. and W.Z.; software, S.Z. and W.Z.; validation, J.X., S.Z. and T.T.; formal analysis, J.X. and S.Z.; writing-original draft preparation, J.X. and S.Z.; writing-review and editing, L.X., Y.W., W.Z. and X.G.; visualization, J.X., S.Z., and A.E.A.; funding acquisition, L.X., Y.W. and X.G. All authors have read and agreed to the published version of the manuscript. ",
#  'China officially declared the epidemic as an outbreak on January 20 when obvious human-to-human transmissions were ascertained with reagent probes and primers distributed to local agencies on that day. Immediately following the declaration, massive actions were taken the next day to curb the epidemic at Wuhan, and soon spread to the whole country from central to local government, including all sectors from business to factories and to schools. On February 23, 2020, Wuhan City and other cities along with the main traffic lines around Wuhan were locked down. Rigorous efforts were devoted to 1) identify the infected and bring them to treatment in hospitals for infectious diseases, 2) locate and quarantine all those who had contact with the infected, 3) sterilize environmental pathogens, 4) promote mask use, and 5) release to the public of number of infected, suspected, under treatment and deaths on a daily basis. The sudden escalation of the control and the spread of the number of infected and deaths, however, ignited strong emotional responses of fear and panic among people in Wuhan. The negative emotional responses soon spread from Wuhan to other parts of China, and further to the world via almost all communication channels, particularly social media. The highly emotional responses of the public were fueled by (1) sudden increases in the number of detected new cases after the massive intervention measures to identify the infected; (2) massive growing needs for masks; (3) a large number of suspected patients waiting to confirm their diagnose; (4) a large number of diagnosed COVID-19 patients for treatment; and (5) a growing number of deaths, despite national efforts to improve therapy, including the decision to build two large hospitals within a period of days. The emotional responses, mostly stimulated by the daily release of data have created a big barrier for effective control of the epidemic as has been observed in other epidemics of similar nature . It is a paradox that during the early period of an epidemic, little is known or available about the new infections; while the need for such information is at the highest level. This is particularly true for the COVID-19. The occurrence of this epidemic may follow a nonlinear, chaotic and catastrophic process, rather similar to the epidemic of SARS that occurred in Hong Kong in 2003 , the Ebola epidemic in West Africa during 2013-16 , the pandemic of 2009 H1N1 epidemic started and the recent measles outbreaks in the United States (US) . Similar to an eruption of a volcano or occurrence of an earthquake, no matter how closely it is monitored, how much research we have done, how much we know about it, no one knows for sure if and when the virus infection will become an outbreak. Therefore, there is no so-called rational responses, no standardoperating-procedure (SOP) to follow, no measures to take without negative consequences . However, defining the COVID-19 as nonlinear and chaotic does not mean that we cannot do anything after we knew it was an outbreak, but simply waiting. On the contrary, defining it as nonlinear and chaotic will better inform us to make right decisions and to take appropriate actions. (1) During the early stage of an infection, which we cannot tell whether it will be growing into an outbreak, we must closely monitor it using limited data and to find the early signs of change and to predict if and when it will become an outbreak; (2) After it is declared as an outbreak, it is better to take actions as soon as possible since infectious diseases can be controlled even without knowledge of the biology ; and evaluate if the control measures work. The ultimate goal of this study is to attempt to provide some solutions to this paradox by providing early messages to inform control measures, to be optimistic and not panic, to ask right questions, and to take right actions. In theory, the true number of persons with COVID-19 infection can never be known no matter how we try to detect it. In practice, of all the infected cases in a day, there are some who have passed the latent period when the virus reaches a detectable level. These patients can then be detected if: a) detection services are available to them, b) all the potentially infected are accessible to the services and are tested, and c) the testing method is sensitive, valid and reliable. When reading the daily data, we must be aware that the detected and diagnosed cases in any day can be great, equal, or below the number of detectable. For example, a detectable person in day one can be postponed to next day when testing services become available. This will result in reduction in a detection rate < 100% in the day before the testing day and a detection rate > 100% in the testing day. Results of F(x) provide information most useful for resource allocation to support the prevention and treatment; however F(x) is very insensitive to changes in the epidemic. To better monitor the epidemic, the first derivative of F(x) can be used: We used these estimated P i in this study in several ways. The modeling analysis was completed using spreadsheet. As a reference to assess the level of severity of the COVID-19 epidemic, the natural mortality rate of Wuhan population was obtained from the 2018 Statistical Report of Wuhan National Economy and Social Development. The dynamic changes based on the observed F(x) in were presented in using the first derivatives F ′ (x) (top panel of the figure) and the second derivative F ′ ′ (x) (bottom panel of the figure), respectively. Before the declaration of outbreak, information provided by the two dynamic measured was similar: not much variations were revealed relative to the changes after the outbreak. These findings suggest the nonlinear and chaotic character of the COVID-19 outbreak. In this study, we used a novel approach to distill information from the cumulative number of diagnosed cases of COVID-19 infection. Among various types of surveillance data, this data often reported the earliest and on a continuous basis with high completeness and are most widely available. In addition, patients with a diagnosed infection are those with high likelihoods to spread the virus to others. Findings from this study provided useful information in a real time manner to monitor, evaluate and forecast the COVID-19 epidemic in China. The methods used in this study although somewhat mathematical, are easy to follow while information extracted from the commonly used data with the methods are highly useful and more sensitive than the daily new and cumulative cases. Although an analytical demonstration of the COVID-19 outbreak as nonlinear, chaotic and catastrophic requires more time to wait till the epidemic ends, evidence in the first 2 months suggests that the COVID-19 outbreak in China is nonlinear and chaotic. The epidemic emerged suddenly after a long latent period without dramatic changes as revealed from the cumulative cases, and their first and second derivatives. The high responsiveness of the epidemic to interventions adds additional evidence supporting the chaotic and catastrophic nature, and demonstrating the selection of a good timing to start intervention. Many of these characters are similar to those observed in the 2003 SARS epidemic started in Hong Kong , the 2013-16 Ebola spread in the West Africa , the 2009 pandemic of H1N1 started in the US , and the measles outbreaks over 80 cities in the US recently . Even the seasonal common flu has been proved to have a nonlinear component . The significance of nonlinear and chaotic nature of COVID-19 means that no methods are available to predict exactly at what point in time the epidemic will emerge as an outbreak, just like volcanoes and earthquakes. Therefore, practically there is no so-called a best time or missed the best time to take actions. There will also no so-called rational analysis and rational responses. There is no silver bullet to use, no standard-operating-procedure (SOP) to follow, and no measures without negative consequences to control the epidemic . For example, it took more than 6 months for both the US and the WHO to determine the 2009 H1N1 pandemic as an outbreak . Therefore, knowing the nonlinear and chaotic nature of an epidemic outbreak, like COVID-19, for all stockholders will be essential to the mobilization of resources, working together, taking all actions possible to control the epidemic, and minimizing the negative consequences. Specifically, what we can do to deal with an outbreak like COVID-19 would be to (1) collect information as early as possible, (2) monitor the epidemic as close as possible just like we do for an earthquake and make preparations for a hurricane and (3) communicate with the society and use confirmed data appropriately reframed not causing or exacerbating fear and panic in the public, stress and distress among medical and public health professionals, as well as administrators to make right decisions and take the right strategies at the right time in the right places for the right people. Knowing the nonlinear and chaotic nature is also essential for taking actions to control the outbreak of an epidemic like the COVID-19 infection. As soon as an outbreak is confirmed, the follow measures should be in position immediately 1) closely and carefully monitor the epidemic; 2) take evidence-based interventions to control the epidemic, 3) actively assess responses of the epidemic to the interventions; 4) allow errors in the intervention, particularly during the early period of the epidemic, 5) always prepare for alternatives. Another confusion is, when an epidemic starts, everyone asks what it is? How does it happen? How should I do to avoid infection? Is there any effective treatment? Answering these questions takes time, but there is no need to wait till all these questions are resolved before taking actions. We can take actions to prevent COVID-19 immediately while waiting for answers to these questions. This is because we have the evidence-based strategy for control and prevention of any infectious disease without complete understanding of an infection. That is so-called Tri-Component Strategy: locating and controlling the sources of infection, identifying and blocking the transmission paths, and protecting those who are susceptible . This was just what China has done, is doing, and will continue to do this time. Typical examples of control and prevention measures include locking down of cities, communities, and villages with potential of large scale transmission, massive environment sterilization, promotion of mask use, efforts to locate, isolate and treat the infected. More importantly, most of these actions are initiated, mobilized, coordinated and supported by the government from central to local, and enhanced by volunteers and international support. There are a number of advantages of methods we developed and used in this study. First, framing the diagnosed cases as the cumulative, the first and the second derivative constructs a system to gauge the epidemic, with the cumulative cases showing the overall level of the epidemic, the first derivative to reflect the change of the epidemic, and the second derivative to monitor the speed of change. By inclusion of the mortality rate as a reference, results from our approach will be (1) comprehensive to inform the public to be prepared, not scared and not to blame others; (2) useful for administrators to make decisions; (3) valuable for medical and health professionals to take actions. Second, we conceptually separated (1) the true number of infections, which will never be practically detected, from (2) the infections that are practically detectable if services are available and accessible and detection technologies are sensitive and reliable, and (3) the actually detected cases of infections. This classification greatly improved our understanding of the observed data as well as findings from the two derivatives, and aided us in assessing the responsiveness to the massive interventions, and predicting of the epidemic over time. The clarification also enhanced our analytical approach by adding an exponential model to evaluate the detection rate and to bring more data assessing the responsiveness of the epidemic to the massive interventions. We highly recommend the inclusion of the methods as a part of routine surveillance in disease control and prevention institutions. There are limitations. First, this study covered only the first 2 months of the epidemic. We will continue to evaluate the utility of this method as we follow the development of the epidemic. Second, the methods used in this study was based on a close population. This hypothesis may not be true because of a large number of people with potential history of exposure in China traveled to other countries. Up to February 8, 2020, the total cases diagnosed were 37,552 worldwide (Worldometer on Coronavirus) with 37,198 in China, which accounted for 99.1% of the total number of the world. Therefore, the impact of close-population assumption would be rather limited. Third, there was a lack of individual patientlevel data for detailed analysis. Fourth, our model can be further improved with other data, such as cases by severity, number of the suspected, number of those who received treatments and treatment results. We will follow the epidemic closely and prepare for further research on the topic when more data become available. Despite the limitations, this study provided new data to encourage those who are infected to better fight against the infections; to inform and encourage the general public, the medical and health professionals and the government to continue their current measures and to think of more measures that are innovative and effective to end the COVID-19 epidemic. One of the greatest motivations for this study is to attempt to provide right information at the population level in a real manner to complement the data from micro-organism centered and laboratory-based biological, molecular, pharmacological and clinical information in both the academic and the mass media that often scare rather than encourage people, even health professionals. Of the diagnosed COVID-19 cases, less than 20% are severe. Findings from our study indicated that there is no need to be panic from a public health population perspective. Although the total cases COVID-19 reached to big numbers, but the 2-month incidence rate was about a half of the natural death rate for Wuhan residents.',
#  "Upon a viral outbreak, it is important to rapidly establish whether the outbreak is caused by a new or a previously known virus (Box 1), as this helps decide which approaches and actions are most appropriate to detect the causative agent, control its transmission and limit potential consequences of the epidemic. The assessment of virus novelty also has implications for virus naming and, on a different timescale, helps to define research priorities in virology and public health. For many human virus infections such as influenza virus 1 or norovirus 2 infections, well-established and internationally approved methods, standards and procedures are in place to identify and name the causative agents of these infections and report this information promptly to public health authorities and the general public. In outbreaks involving newly emerged viruses, the situation may be different, and appropriate procedures to deal with these viruses need to be established or refined with high priority. Virus nomenclature is a formal system of names used to label viruses and taxa. The fact that there are names for nearly all viruses within a species is due to the historical perception of viruses as causative agents of specific diseases in specific hosts, and to the way we usually catalogue and classify newly discovered viruses, which increasingly includes viruses that have not been linked to any known disease in their respective hosts (Box 1). The WHO, an agency of the United Nations, coordinates international public health activities aimed at combating, containing and mitigating the consequences of communicable diseases-including major virus epidemics-and is responsible for naming disease(s) caused by newly emerging human viruses. In doing so, the WHO often takes the traditional approach of linking names of specific diseases to viruses (Box 1) and assessing virus novelty by an apparent failure to detect the causative agent using established diagnostic assays. Researchers studying coronaviruses-a family of enveloped positive-strand RNA viruses infecting vertebrates 8 -have been confronted several times with the need to define whether a newly emerged virus causing a severe or even life-threatening disease in humans belongs to an existing or a new (yet-to-be-established) species. This happened with SARS 9-12 and with Middle East respiratory syndrome (MERS) 13,14 a few years later. Each time, the virus was placed in the taxonomy using information derived from a sequencebased family classification . However, the host of a given virus may be uncertain, and virus pathogenicity remains unknown for a major (and fast-growing) proportion of viruses, including many coronaviruses discovered in metagenomics studies using next-generation sequencing technology of environmental samples . These studies have identified huge numbers of viruses that circulate in nature and have never been characterized at the phenotypic level. Thus, the genome sequence is the only characteristic that is known for the vast majority of viruses, and needs to be used in defining specific viruses. In this framework, a virus is defined by a genome sequence that is capable of autonomous replication inside cells and dissemination between cells or organisms under appropriate conditions. It may or may not be harmful to its natural host. Experimental studies may be performed for a fraction of known viruses, while computational comparative genomics is used to classify (and deduce characteristics of) all viruses. Accordingly, virus naming is not necessarily connected to disease but rather informed by other characteristics. In view of the above advancements and when confronted with the question of whether the virus name for the newly identified human virus should be linked to the (incompletely defined) disease that this virus causes, or rather be established independently from the virus phenotype, the CSG decided to follow a phylogeny-based line of reasoning to name this virus whose ontogeny can be traced in the figure in Box 1. Year 2012 First name Name origin the place of new viruses through their relation to known viruses in established taxa, including placements relating to the species Severe acute respiratorysyndrome-related coronavirus. In the classification of nidoviruses, species are considered biological entities demarcated by a genetics-based method 21 , while generally virus species are perceived as man-made constructs . To appreciate the difference between a nidoviral species and the viruses grouped therein, it may be instructive to look at their relationship in the context of the full taxonomy structure of several coronaviruses. Although these viruses were isolated at different times and locations from different human and animal hosts (with and without causing clinical disease), they all belong to the species Severe acute respiratorysyndrome-related coronavirus, and their relationship parallels that between human individuals and the species Homo sapiens ). This evaluation is usually conducted in silico using phylogenetic analysis, which may be complicated by uneven rates of evolution that vary across different virus lineages and genomic sites due to mutation, including the exchange of genome regions between closely related viruses (homologous recombination). However, given that the current sampling of viruses is small and highly biased toward viruses of significant medical and economic interest, group composition varies tremendously among different viruses, making decisions on virus novelty group-specific and dependent on the choice of the criteria selected for this assessment. Initially, the classification of coronaviruses was largely based on serological (cross-) reactivities to the viral spike protein, but is now based on comparative sequence analyses of replicative proteins. The choice of proteins and the methods used to analyse them have gradually evolved since the start of this century . The CSG currently analyses 3CLpro, NiRAN, RdRp, ZBD and HEL1 (ref. 52 ) , two domains less than previously used in the analyses conducted between 2009 and 2015 (refs. . According to our current knowledge, these five essential domains are the only ones conserved in all viruses of the order Nidovirales . They are thus used for the classification by all ICTV nidovirus study groups (coordinated by the NSG). The available yet limited epidemiological and clinical data for SARS-CoV-2 suggest that the disease spectrum and transmission efficiency of this virus 31-35 differ from those reported for SARS-CoV 9 . To accommodate the wide spectrum of clinical presentations and outcomes of infections caused by SARS-CoV-2 (ranging from asymptomatic to severe or even fatal in some cases) , the WHO recently introduced a rather unspecific name (coronavirus disease 19, also known as COVID-19 (ref. )) to denote this disease. Also, the diagnostic methods used to confirm SARS-CoV-2 infections are not identical to those of SARS-CoV. This is reflected by the specific recommendations for public health practitioners, healthcare workers and laboratory diagnostic staff for SARS-CoV-2 (for example, the WHO guidelines for SARS-CoV-2 (ref. ). By uncoupling the naming conventions used for coronaviruses and the diseases that some of them cause in humans and animals, we wish to support the WHO in its efforts to establish disease names in the most appropriate way (for further information, see the WHO's guidelines for disease naming ). The further advancement of naming conventions is also important because the ongoing discovery of new human and animal viruses by next-generation sequencing technologies can be expected to produce an increasing number of viruses that do not (easily) fit the virus-disease model that was widely used in the pregenomic era (Box 1). Having now established different names for the causative virus (SARS-CoV-2) and the disease , the CSG hopes that this will raise awareness in both the general public and public health authorities regarding the difference between these two entities. The CSG promotes this clear distinction because it will help improve the outbreak management and also reduces the risk of confusing virus and disease, as has been the case over many years with SARS-CoV (the virus) and SARS (the disease). Intra-SARS-CoV distances example, SARS-CoV-2/human/Wuhan/X1/2019. This complete designation along with additional and important characteristics, such as pathogenic potential in humans or other hosts, should be included in the submission of each isolate genome sequence to public databases such as GenBank. In publications, this name could be further extended with a sequence database ID-for example, SARS-CoV-2/human/Wuhan/X1/2019_XYZ12345 (fictional example)-when first mentioned in the text. We believe that this format will provide critical metadata on the major characteristics of each particular virus isolate (genome sequence) required for subsequent epidemiological and other studies, as well as for control measures. Historically, public health and fundamental research have been focused on the detection, containment, treatment and analysis of viruses that are pathogenic to humans following their discovery (a reactive approach). Exploring and defining their biological characteristics in the context of the entire natural diversity as a species has never been a priority. The emergence of SARS-CoV-2 as a human pathogen in December 2019 may thus be perceived as completely independent from the SARS-CoV outbreak in 2002-2003. Although SARS-CoV-2 is indeed not a descendent of SARS-CoV , and the introduction of each of these viruses into humans was likely facilitated by independent unknown external factors, the two viruses are genetically so close to each other , panel c of the figure in Box 4) that their evolutionary histories and characteristics are mutually informative. The currently known viruses of the species Severe acute respiratory syndrome-related coronavirus may be as (poorly) representative for this particular species as the few individuals that we selected to represent H. sapiens in . It is thus reasonable to assume that this biased knowledge of the natural diversity of the species Severe acute respiratory syndrome-related coronavirus limits our current understanding of fundamental aspects of the biology of this species and, as a consequence, our abilities to control zoonotic spillovers to humans. Future studies aimed at understanding the ecology of these viruses and advancing the accuracy and resolution of evolutionary analyses 41 would benefit greatly from adjusting our research and sampling strategies. This needs to include an expansion of our current research focus on human pathogens and their adaptation to specific hosts to other viruses in this species. To illustrate the great potential of species-wide studies, it may again be instructive to draw a parallel to H. sapiens, and specifically to the impressive advancements in personalized medicine in recent years. Results of extensive genetic analyses of large numbers of individuals representing diverse populations from all continents have been translated into clinical applications and greatly contribute to optimizing patient-specific diagnostics and therapy. They were instrumental in identifying reliable predictive markers for specific diseases as well as genomic sites that are under selection. It thus seems reasonable to expect that genome-based analyses with a comparable species coverage will be similarly insightful for coronaviruses. Also, additional diagnostic tools that target the entire species should be developed to complement existing tools optimized to detect individual pathogenic variants (a proactive approach). Technical solutions to this problem are already available; for example, in the context of multiplex PCR-based assays . The costs for developing and applying (combined or separate) species-and virus-specific diagnostic tests in specific clinical and/or epidemiological settings may help to better appreciate the biological diversity and zoonotic potential of specific virus species and their members. Also, the further reduction of time required to identify the causative agents of novel virus infections will contribute to limiting the enormous social and economic consequences of large outbreaks. To advance such studies, innovative fundraising approaches may be required. Although this Consensus Statement focuses on a single virus species, the issues raised apply to other species in the family and possibly beyond. A first step towards appreciation of this species and others would be for researchers, journals, databases and other relevant bodies to adopt proper referencing to the full taxonomy of coronaviruses under study, including explicit mentioning of the relevant virus species and the specific virus(es) within the species using the ICTV naming rules explained above. This naming convention is, regretfully, rarely observed in common practice, with mixing of virus and species names being frequently found in the literature (including by the authors of this Consensus Statement on several past occasions). The adoption of accurate virus-naming practices should be facilitated by the major revision of the virus species nomenclature that is currently being discussed by the ICTV and is being planned for implementation in the near future . With this change in place, the CSG is resolved to address the existing significant overlap between virus and species names that complicates the appreciation and use of the species concept in its application to coronaviruses."]


with open('../input/topiqal/squad.txt', 'r') as fp:
    data = fp.readlines()            
            
contexts = data

def build_para():
    paras = []
    for i in range(len(contexts)):
        d = dict()
        d["qas"] = build_qa()
        d["context"] = contexts[i]

        paras.append(d)
    return paras

squad = {
         "version": "v2.0", "data": 
         [
          {"title": "Abstract",
           "paragraphs": build_para()
          }
         ]
     }

with open('input_file.json', 'w') as f:
    json.dump(squad, f)



### **Prediction**

In [None]:

%%timeit

!python run_squad.py \
  --vocab_file='/biobert_large/vocab_cased_pubmed_pmc_30k.txt' \
  --bert_config_file=/'biobert_large/bert_config_bio_58k_large.json' \
  --init_checkpoint='/model.ckpt-10859' \
  --do_train=False \
  --max_query_length=30  \
  --do_predict=True \
  --predict_file=input_file.json \
  --predict_batch_size=8 \
  --n_best_size=10 \
  --max_seq_length=384 \
  --doc_stride=128 \
  --output_dir=LOCAL_OUTPUT_DIR/

ID to Question converion happens below, given the predictions.json file from the BERT-SQUAD Inference.

In [None]:
squad_task3_CZI = {'data': [{'paragraphs': [{'context': 'International Virus Classification Commission (ICTV) classified 2019-nCoV as Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2) on February 11, 2020. At the same time, WHO named the disease caused by 2019-nCoV as COVID-19. Common symptoms of a person infected with coronavirus include respiratory symptoms, fever, cough, shortness of breath, and dyspnea. In more severe cases, infection can cause pneumonia, severe acute respiratory syndrome, kidney failure, and even death. There is currently no specific medicine or treatment for diseases caused by SARS-CoV-2 . In the fight against coronavirus, scientists have come up with three strategies for developing new drugs . As  Whether the screened anti-viral drugs really work on these targets needs further verification. We also do not recommend the application of new coronavirus pneumonia to compounds for which no target has been predicted. Nsp3b and E-channel. But we need to do further experiments to verify this conclusion. In ',
     'qas': [{'id': '468c2151-6187-4ce1-9536-93cb71306e03',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'c8af20d0-4476-438f-8e83-d0804c52dcab',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '8bf87c71-0f97-4b9c-9083-f6c10c5bc10d',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '1d0d7701-f592-4c9d-99cb-3d2e0413f66a',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '7af64f18-8ef1-48a6-9f0c-5682dfceab1f',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': 'f2f2d3ae-0c70-49c2-8981-35936e218682',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': 'a8134d01-1109-4990-b8c5-f10fc1d0770e',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '0c1d43ff-1af4-4ca0-a506-957f953d5863',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '5b782552-c91b-4682-83b1-1911cd08c348',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': 'bcfdecb4-fd7a-4501-b203-3dcf486c919f',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'b9b7218d-0341-46d2-8840-c8231db232ef',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'Since December 2019, Wuhan and gradually other places of China have experienced an outbreak of pneumonia epidemic caused by the 2019 novel coronavirus (2019-nCoV, later named SARS-CoV-2). The World Health Organization has declared the current outbreak of COVID-19 in China as a Public Health Emergency of International Concern. As of 10:00 Feb 13, 2020, the epidemic has caused 1366 deaths out of 59 834 confirmed and 16 067 suspected cases. Some unprecedented measures were taken to stop the spread of the virus including cancelling of gatherings, extending the Chinese New Year holidays, and limiting the number of people in public places (e.g. train stations and airports). The outbreak itself and the control measures may lead to widespread fear and panic, especially stigmatization and social exclusion of confirmed patients, survivors and relations, which may escalate into further negative psychological reactions including adjustment disorder and depression. Sudden outbreaks of public health events always pose huge challenges to the mental health service system. Examples include the HIV/AIDS epidemic that captivated world attention in the 1980s and 1990s, the severe acute respiratory syndrome (SARS) in 2002 and 2003, the H1N1 influenza pandemic of 2009, the Ebola virus outbreak in 2013, and the Zika virus outbreak in 2016. During these epidemics, the consequences on the psychosocial wellbeing of at-risk communities are sometimes largely overlooked, especially in the Ebola-affected regions, where few measures were taken to address the mental health needs of confirmed patients, their families, medical staffs or general population. The absence of mental health and psychosocial support systems and the lack of well-trained psychiatrists and/or psychologists in these regions increased the risks of psychological distress and progression to psychopathology. The lack of effective mental health systems added to the poverty in Sierra Leone and Liberia. In China, the mental health service system has been greatly improved after several major disasters, especially the Wenchuan earthquake. In the process of dealing with group crisis intervention, various forms of psychosocial intervention services have been developed, including the intervention model of expert-coach-teacher collaboration after the Wenchuan earthquake 10 and the equilibrium psychological intervention on people injured in the disaster incident after the Lushan earthquake. With the support for remote psychological intervention provided by the development of Internet technology, especially the widespread application of 4G or 5G networks and smartphones, we developed a new intervention model to handle the present COVID-19 public health event. This new model, one of West China Hospital, integrates physicians, psychiatrists, psychologists and social workers into Internet platforms. We propose that the psychological crisis intervention should be dynamic, adapted to suit different stages of the epidemic, i.e., during and after the outbreak. During the outbreak, mental health professionals should actively participate in the overall intervention process for the disease, so that the mental health and psychosocial response can be mobilized in a timely fashion. Specifically, psychological crisis interventions should be integrated into the treatment of pneumonia and blocking of the transmission routes. In this stage, psychological crisis intervention should include two simultaneous activities: (1) intervention for fear of disease, carried out mainly by physicians and assisted by psychologists; During the epidemic, rapid integration of the government and social forces into the Internet framework can maximize effective management of the psychological crisis. We established a pyramid structure of psychological crisis management with government as the core leader. At the bottom of the pyramid are communities, which mainly provide psychosocial support. Psychological assistance (such as hotline, online consulting) is used to identify and help the target groups who need intervention. Through the Huayitong app and Psyclub applet (two integrated APPs for online registration, appointment, payment and other functions for West China Hospital and Sichuan psychological consultant platform), telephone hotline and WeChat platform, we quickly organized physicians at all levels of the West China Hospital (including retired professors) and psychologists from all over Sichuan Province to form psychological rescue teams to formulate solutions (e.g. developing technical guidelines and training programs, starting online consultation and setting up problem feedback mechanisms). Psychological rescue teams conduct crisis interventions for confirmed patients and front-line staff. The expert team at the top of the pyramid provide health education and training during the whole process . How to quickly identify the emotional and stress problems of individuals is an important part of basis for psychological intervention. We screened the mental health status of suspected cases, medical staffs and general population via WeChat platform and/or telephone by using questionnaires (e.g. Mood Index Questionnaire, Patient Health Questionaire-9) as the evaluation tool. Proper intervention strategies were chosen based on the screening results. Follow-up is performed regardless of whether the individual reports mental health problems or not. The process and content of psychological intervention is shown in and . After the epidemic outbreak, psychosocial support mainly focuses on the quarantined people and medical staffs working for them . Social support and psychological intervention are mostly provided by family members, social workers, psychologists, and psychiatrists to isolated patients, suspected patients, and close contacts, primarily through telephone hotline and Internet (e.g. WeChat, APPs). Medical staffs working for the quarantined are the special group who need a lot of social support, and they are also an important force to provide social support for the isolated patients. To guarantee their continued effective work, their mental health status should be monitored and a continuum of timely interventions should be made available to support them. The Anticipated, Plan and Deter (APD) Responder Risk and Resilience Model is an effective method for understanding and managing psychological impacts among medical staffs, including managing the full risk and resilience in the responder "hazard specific" stress. In the APD process, medical staffs receive a pre-event stress training focusing on the psychosocial impact of high-casualty events on the hospital and field disaster settings. During the training, participants are given the chance to develop a "personal resilience plan", which involves identifying and anticipating response challenges. After that they should learn to use it in real intervention response.',
     'qas': [{'id': '1d60331d-c558-4014-ae77-da7bb5efd0af',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '9d6c75d8-3e8e-4fd9-8977-e72b461ecc63',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': 'd0bd4d3e-11bd-433b-bace-35926edc1025',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'a526e3e3-1efd-404f-b488-efd5ecd6f536',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': 'c4cc943e-7eb6-47bd-8eec-18f9744336f9',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': 'ddb2d5c0-030d-4756-a28c-29c1d6b3799c',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '9721e098-60cd-4576-ad4b-193762fe667c',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': 'a728e088-bb4a-4a6a-a73b-03ee7444c278',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '0e1998c0-7d0a-4dec-83bc-0adea5da5518',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '58d2cca5-2f76-44ae-b781-b0eeda6c0dde',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': '53f150e8-e68b-4f89-86e4-ac4bb6af2317',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'Coronavirus disease 2019 (COVID-2019) has been recognized as a global threat, and several studies are being conducted using various mathematical models to predict the probable evolution of this epidemic. These mathematical models based on various factors and analyses are subject to potential bias. Here, we propose a simple econometric model that could be useful to predict the spread of COVID-2019. We performed Auto Regressive Integrated Moving Average (ARIMA) model prediction on the Johns Hopkins epidemiological data to predict the epidemiological trend of the prevalence and incidence of COVID-2019. For further comparison or for future perspective, case definition and data collection have to be maintained in real time. © 2020 The Authors. Published by Elsevier Inc. This is an open access article under the CC BY license (http://creativecommons. org/licenses/by/4.0/). The daily prevalence data of COVID-2019 from January 20, 2020 to February 10, 2020 were collected from the official website of Johns Hopkins University (https://gisanddata.maps.arcgis.com/apps/ opsdashboard/index.html), and Excel 2019 was used to build a time-series database [1]. ARIMA model was applied to a dataset consisting of 22 number determinations. shows that the overall prevalence of COVID-2019 presented an increasing trend that is reaching the epidemic plateau. The difference between cases of one day and cases of the previous day D(Xn-Xn-1) showed a nonconstant increase in the number of confirmed cases. Descriptive analysis of the data was performed to evaluate the incidence of new confirmed cases of COVID-2019 and to prevent eventual bias. The ARIMA model includes autoregressive (AR) model, moving average (MA) model, and seasonal autoregressive integrated moving average (SARIMA) model . The Augmented Dickey-Fuller (ADF) unit-root test helps in estimating whether the time series is stationary. Log transformation and differences are the preferred approaches to stabilize the time series . Seasonal and nonseasonal differences were used to stabilize the term trend and periodicity. Parameters of the ARIMA model were estimated by autocorrelation function (ACF) graph and partial autocorrelation (PACF) correlogram. To determine the prevalence of COVID-2019, ARIMA (1,0,4) was selected as the best ARIMA model, while ARIMA (1,0,3) was selected as the best ARIMA model for determining the incidence of COVID-2019. Gretl2019d statistical software was used to perform Specifications Infectious Diseases Specific subject area Econometric models applied to infectious diseases epidemiological data to forecast the prevalence and incidence of COVID-2019 Type of data Chart Graph data were acquired Gretl 2019d http://gretl.sourceforge.net/win32/index_it.html Data format Data are in raw format and have been analyzed. An Excel file with data has been uploaded. Parameters for data collection Parameters used for ARIMA were model ARIMA (1,2,0) and ARIMA (1,0,4) Description of data collection The daily prevalence data of COVID-2019 from January 20, 2020 to February 10, 2020 were collected from the official website of Johns Hopkins university (https://gisanddata. maps.arcgis.com/apps/opsdashboard/index.html), and Excel 2019 was used to build a time-series database. Descriptive analysis of the data was performed, and to evaluate the incidence of new confirmed cases of COVID-2019 and to prevent eventual bias, the difference between the cases confirmed on that day and the cases confirmed on the previous day were calculated D(X n -X n-1 ). Raw data can be retrieved from the Github repository https://github.com/ CSSEGISandData/COVID-19 Value of the Data These data are useful because they provide a forecast for COVID-2019 epidemic, thus representing a valid and objective tool for monitoring infection control. All institutions involved in public health and infection control can benefit from these data because by using this model, they can daily construct a reliable forecast for COVID-2019 epidemic. The additional value of these data lies in their easy collection and in the possibility to provide valid forecast for COVID-2019 daily monitoring after the application of the ARIMA model. These data represent an easy way to evaluate the transmission dynamics of COVID-2019 to verify whether the strategy plan for infection control or quarantine is efficient. statistical analysis on the prevalence and incidence datasets, and the statistical significance level was set at 0.05. A previous study was considered as reference for the methodology of the analysis . Logarithmic transformation was performed to evaluate the influence of seasonality on the forecast. The correlogram reporting the ACF and PACF showed that both prevalence and incidence of COVID-2019 are not influenced by the seasonality. The forecast of prevalence and incidence data with relative 95% confidence intervals are reported in . Although more data are needed to have a more detailed prevision, the spread of the virus seems to be slightly decreasing. Moreover, although the number of confirmed cases is still increasing, the incidence is slightly decreasing. If the virus does not develop new mutations, the number of cases should reach a plateau ( . The forecast and the estimate obtained are influenced by the "case" definition and the modality of data collection. For further comparison or for future perspective, case definition and data collection must be maintained in real time. ',
     'qas': [{'id': '89743c94-3ede-4a21-9a95-fa8ce7ad18ea',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'ad84a5f1-0482-42c3-af76-1cc465ff393c',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '202c83da-012d-4d41-83a2-01f215ba18f2',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '2514870a-9383-48a3-87ab-f7db3828bab7',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': 'bdb27ccf-b1c2-44d8-bbb5-d454e686e75f',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '25457424-7632-4a66-91ba-754aa40c2126',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': 'e7d55420-8c3b-4c70-ba9a-77b2c8b4de77',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '0eac5863-5a8e-492c-b904-0548ad522139',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '781c629d-9215-47a8-8624-2f7d0b777195',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '38bfa97a-87ae-4dd3-9092-f1f4e201bb47',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': '629d899e-1368-46cb-9983-dde1be1bc608',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': "Using 2019 data from the International Air Transport Association (IATA), we identified all cities in China that received at least 100 000 airline passengers from Wuhan during February through April 2019. In a scenario where these cities might experience local epidemics, we analyzed the volumes of airline passengers to international destinations from February to April 2019. To generate these estimates, we used anonymized, passenger-level flight itinerary data from IATA, comprising both commercial flights and scheduled charter flights. These data account for ∼90% of global air travel volumes, with the remaining volumes modelled using market intelligence. We report the top 50 international destination cities of passengers arriving from nine cities in mainland China, plus Hong Kong, and present the corresponding infectious disease vulnerability index (IDVI) for each receiving country. The IDVI is a validated measure of a country's capacity to manage infectious disease threats, and utilizes multiple indicators including health, political and economic metrics. Scores range from 0 to 1 with higher scores representing a greater capacity to cope with epidemic threats. We analyzed international airline passenger trips from the following 10 cities: Wuhan, Beijing, Shanghai, Kunming, Chengdu, Xiamen, Haikou, Guangzhou, Shenzhen and Hong There currently are numerous unknowns including the presumed animal origins of the virus, the efficiency of humanto-human transmission, an understanding of the full spectrum of clinical illness and an incomplete epidemiological picture of disease activity in China. While our analysis does not account for the potential effects of the epidemic on changing travel behaviours, it reflects worldwide flows of airline travellers at the same time of year in 2019, including the Lunar New Year in February 2019. At the time of writing, flights and land transportation in and out of Wuhan have been suspended, and it is uncertain how this will impact disease transmission in China. Our findings could support public health planning and readiness for different scenarios should the current epidemic spread more widely across mainland China and neighbouring cities.",
     'qas': [{'id': '52116598-8fee-4d4d-945f-fbdcc57978d6',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'ca669eda-0e09-4fe8-bdd2-55910f2cbfa5',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '3bbd5304-0b5f-4472-bebb-461a00ea9953',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '98c6e111-0023-4833-abd3-f0933feb8cd6',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '23c3c976-20b0-4476-9a67-398929f29f04',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '0d826d5d-8997-4505-bb3f-398d616616b7',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '34450349-c1bf-414e-a3c8-ca5d6fece8b8',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '5a216ef3-dd53-4d2b-9038-e39a3bcef6e6',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '1290996c-c41a-4dad-bd35-43fcb0980e3f',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': 'c6bdca35-5877-4435-8e48-1d15d31cc32d',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': '9550b0b0-a4a1-4508-9512-0a6d25a97bd7',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': "While specific vaccines and antiviral agents are the most effective methods to prevent and treat viral infection, there are not yet effective treatments that target the 2019-nCoV. Development of these treatments may require months or years, meaning that a more immediate treatment or control mechanism should be found if possible. Herbs used in traditional Chinese medicine present a potentially valuable resource to this end. The effectiveness of herbal treatment to control contagious disease was demonstrated during the 2003 severe acute respiratory syndrome (SARS) outbreak . As such, the Chinese government is encouraging the use of herbal plants in fighting this new viral pneumonia. However, the application of herbal treatment is mainly guided by the type of herb (based on the catalogue of classic literature on herbs) and the patient's symptoms or signs. There is often not enough information to predetermine whether the herbs in question can directly target the viral cause, in other words, herbal usage is generally not guided by viral pathology. We think more detailed knowledge about the direct antiviral effects of different plants would be greatly helpful to the doctors selecting them. It is a challenge to screen out the herbs containing anticoronavirus (2019-nCoV) compounds from the large number of those possibly being used for patients infected with this pathogen, especially in very short time. Here, we propose two principles to guide such work: oral effectiveness and traditional usage compatibility. The first principle refers to the fact that most Chinese herbal plants are orally ingested after boiling with water, meaning that the anti-coronavirus (2019-nCoV) ingredients in selected plants should be absorbable via oral preparation. The second principle recognizes that candidate plants should be consistent with the type classifications for traditional herbal usage, since type-guided applications are integral to herbal use, as mentioned above. Following these two principles, we used a 6-step selection process (3 for each principle), including drug-likeness, evaluation of oral bioavailability, molecular docking, network pharmacology analysis and other methods to identify herbs that have both a high possibil-ity of containing effective anti-coronavirus (2019-nCoV) compounds and are classified as treating virus-caused respiratory infection. Since Chinese herbal treatments are always taken orally after boiling with water, an in silico integrative model of absorption, distribution, metabolism and excretion (ADME) was used to screen for natural compounds that may be bioactive via oral administration. The indices used for the screening include evaluation of oral bioavailability, Caco-2 permeability, drug-like value, and drug half-life. The threshold values indicating effectiveness for these four indices were > 30%, > À0.4, > 0.18 and > 3 h, respectively, as recommended by Hu et al . The values of these four indices can be obtained from the TCMSP database. Herbs were selected through three steps. (1) Primary selection: molecules chosen from the above steps were used as input for the TCMSP, ETCM and SymMap to search for plants containing that input and the plants were filtered by the numbers of antiviral compounds they contain. Those containing 2 or more antiviral compounds were selected for the next step. (2) Classic usage catalogue cross-reference: only herbs traditionally used to treat viral respiratory infection were retained for further study. The TCMSP provided the main components of each herb and the protein targets for each component. We identified the reported chemical constituents for each plant in the final analysis and used the ADME indices listed above to find the orally absorbable and drug-like compounds for the plant. The protein targets of these compounds were downloaded from the TCMSP database. All protein targets for each individual plant were used as input for the String online server (https://string-db.org/) to perform proteinprotein interaction analysis and pathway enrichment. Kyoto Encyclopedia of Genes and Genomes (KEGG) pathways enriched (with P < 0.01) by the input were downloaded. All data were processed using the statistical language R (3.6.2), unless otherwise specified. We received 261 hits from conducting our search in the PubMed database. After careful evaluation of the abstracts from these citations we downloaded and carefully analyzed the full text of 23 highly relevant papers. The natural compounds reported to have biologically confirmed anti-coronavirus activity were identified and then compared to the ingredients listed in three Chinese herbal databases. The result was 115 overlapping ingredients, which we used for further testing ). Step 2 Step 3 Step 2 Step 3 Step 1 Each of the potentially effective herbal remedies contains many ingredients in addition to the antiviral ones found here. Thus, the general effects of each plant should be examined by combining the effects of all of the orally absorbable and biologically active ingredients in it. To evaluate the possible general in vivo effects of each of our identified herbs, we used the ADME indices listed above to examine each of the orally absorbable and drug-like ingredients recorded in the TCMSP database for each plant. We then extracted the target proteins for each ingredient which had passed the screening process. All proteins belonging to a single plant were combined as input on the online protein-protein interaction analysis server, String, to find the pathway enrichment. For the 26 herbs, about 1/3 of the top 30 KEGG-enriched pathways (mean = 11) were related to regulating viral infection, immune/inflammatory reactions and hypoxia response, indicating that they are potentially effective treatments for viral respiratory infection ( ). Note that some of the herbal plants selected here had been reported to be effective for SARS-CoV infection in ). Two principles guided our screening work. The first is that the anti-coronavirus (2019-nCoV) components contained in the source plants should be absorbable via oral prescription. This principle requires that the herbs selected should contain biologically proven anti-coronavirus (2019-nCoV) ingredients, and that these natural compounds should pass the drug-likeness and oral bioavailability evaluations. Therefore, we conducted a three-step screening process. First, we extracted natural compounds verified in PubMed as being effective in treating SARS or MERS coronavirus and then cross-checked these compounds in the Chinese herbal databases. There were 115 overlapping compounds. This method was an expeditious way to identify natural components both preexisting in Chinese herbal treatment and having a high possibility of anti-coronavirus (2019-nCoV) activity. This is important, as the anti-coronavirus effects of the selected compounds have been biologically confirmed, and the genetic similarities between coronavirus (2019-nCoV) and SARS or MERS coronavirus are high . The anti-coronavirus effects of the natural compounds screened by the above method have been mainly confirmed in vitro by direct loading onto cultured cells, thus it does not guarantee their effectiveness in vivo, especially with oral preparation-the principal way in which Chinese herbals are administered. Therefore, to meet the first principle, we ran ADME filters on the natural compounds selected by 4 indices, as used by Hu et al. . Among the 115 compounds highlighted by our first step, only 13 passed this screening, showing the necessity of such a test. Our second principle for screening should also be emphasized and elaborated upon. It states that the selected herbal plants must conform to traditional usages. There are many kinds of Chinese herbs that have been used for thousands of years. Based on this rich history and experience, Chinese herbal medicines are divided into different types, each type dedicated to certain kinds of diseases. Ignoring these grouping guidelines can lead to serious side effects. Therefore, as a further condition for the medicine screened here, we verified that they have been routinely used to treat viral pneumonia. To meet this principle, we conducted another threestep screening process for the herbal plants. Of course, it should be pointed out that Chinese herbs that have not been identified through this screening process may still have beneficial effects. Further, considering that the biologically validated natural compounds reported in the literature cannot cover all antiviral natural compounds, and the natural compounds included in the Chinese medicine database are not complete, the process that we have followed may have excluded herbs that would be well suited to this treatment. Nevertheless, the purpose of this screening was to provide a rational approach for selecting Chinese herbal medicines with a high potential efficacy in treating 2019-nCoV and related viruses. The specific dosage and usage of each herb should be determined based on patients' manifestations. Finally, the key step in this screening was molecular docking. The 3D structures of the proteins used here are based on reported gene sequences. If the virus mutates during transmission, a new screening is recommended. In conclusion, this work has identified several Chinese medicinal plants classified as antiviral/pneumonia-effective that might directly inhibit the novel coronavirus, 2019-nCoV. Additionally, we propose screening principles and methods which may provide guidance in screening antiviral drugs from other natural drug databases. DZ conceived the study, participated in its design, coordination, and all the work processes. KW participated in herbal selection. XZ participated in data collection and network pharmacology analysis. SD helped to collect data. BP helped to draft the manuscript. ",
     'qas': [{'id': 'e8cefba4-4d01-4857-a22e-313e924e8e3a',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '247da974-4268-4c3d-b3a7-55665dcd997c',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '538db616-17aa-4e08-811c-82987a0fcd04',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '3db6a41b-addb-49e4-a934-b02e76c09a2e',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': 'eacfa27b-c2e8-4d1a-8290-050b2d4793ce',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '4a077570-65b9-43ad-9018-be2037cac84d',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '7064bc51-d442-4421-a714-481034b65149',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': 'da7636f4-a03a-4f09-aa62-274bbf519cff',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '8d1f185d-4733-4865-b979-19401fa1e832',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': 'af04c3be-3719-4d03-9e82-28a5a2234a50',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'f15e74ad-ce6a-45d2-ab84-ae60e22bad18',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': "This study was conducted in accordance with the Declaration of Helsinki and was approved by the National Health Commission of China and Ethics Commission of the Jin Yin-tan Hospital of Wuhan (No. KY-2020-01.01). The requirement for written informed consent was waived given the context of emerging infectious diseases. Bronchoalveolar lavage fluid (BAL) samples were collected from five patients hospitalized with pneumonia in Jin Yintan Hospital of Wuhan, Wuhan, Hubei province, China from December 18 to 29, 2019. Information was gathered, including clinical data, demographic characteristics, underlying medical conditions, clinical signs and symptoms, chest radiographic findings, clinical laboratory testing results, traveling history, recent animal exposure, and outcomes. The data collected for the cases were deemed by the National Health Commission of the People's Republic of China as the contents of a public health outbreak investigation. Spot slides were prepared by applying 20 mL of the virusinfected or non-infected cell suspension onto 12-well Teflon-coated slides. The cells were fixed with 4% paraformaldehyde in 1Â phosphate-buffered saline (PBS) for 30 min, washed three times with PBS, blocked, and stained with serum from a convalescent patient or serum from a healthy person for 30 min at 37°C at a dilution of 1:200. Goat anti-human immunoglobulin G conjugated with fluorescein isothiocyanate was used as the secondary antibody (Jackson Immuno Research Laboratories, Inc., West Grove, PA, USA). Nuclei and the cytoplasm were counterstained with 4 0 ,6 0 -diamidino-2phenylindole and Evans blue (Sigma-Aldrich, St. Louis, MO, USA). Fluorescent images were obtained and analyzed using laser-scanning confocal microscopy (Airyscan LSM880, Zeiss, Berlin, Germany). Patient 1 was a 65-year-old man who reported a high fever and cough, with little sputum production, at the onset of illness. He had a continuous fever and developed severe shortness of breath 16 days later. He was a vendor at the Huanan Seafood Market, Wuhan, Hubei Province, China. Patient 2, a 49-year-old woman, presented with high fever and dry cough. Five days later, she developed dyspnea and was admitted to the hospital. She was also a worker in the Huanan Seafood Market. Patient 3 was a 52-year-old woman who did not report any market exposure. She was admitted to hospital because of fever, cough, and groundglass opacity in the chest computed tomography scan. Patient 4 was a 41-year-old man who also presented with high fever and dry cough at the onset of the illness. He developed acute respiratory distress syndrome 7 days later. This patient had no known history of exposure to the Huanan Seafood Market. Patient 5, a 61-year-old man, was admitted to a local hospital with a 7-day history of fever, cough, and dyspnea. He also worked in the market. With regards to medical history, Patient 4 had hypertension, and Patient 5 had chronic liver disease and abdominal myxoma, whereas none of the other patients had a record of underlying diseases. The demographic and clinical characteristics of the five patients are summarized in . A substantial proportion of all sequencing reads mapped to the newly reported CoV genome (BWA mem, version: 0.7.12), ranging from 71,883 (0.27% among all reads) in Patient 4 to 37,247,818 (85%) in Patient 5. In addition, very few reads mapped to known bacterial pathogens, including Streptococcus, Acinetobacter baumannii, and Pseudomonas [ -E]. The clinical features and laboratory test results of the five patients are summarized in . Fever, cough, and dyspnea were the most common symptoms. The white blood cell counts varied among these patients, but the lymphocyte counts were generally low. The alanine aminotransferase and serum creatine levels were normal or only slightly increased. Bilateral ground-glass opacities and consolidation were observed on chest radiography from two representative patients, Patient 2 based on aortic arch scan [ ] and pulmonary vein scan [ ] on day 10 after symptoms onset and Patient 5 taken on day 12 [ ] and 13 [ ] after symptoms onset. Several complications were observed in these patients. Four of the five patients (except for Patient 3) developed acute respiratory distress syndrome requiring oxygen therapy, and two patients were given extracorporeal membrane oxygenation. Two patients (Patients 1 and 5) experienced secondary infections, and Patient 5 later developed septic shock as well as acute kidney injury, and ultimately died of multi-organ failure. Patient 3 was discharged on January 8, 2020 (day 17 after symptoms onset). The other three patients were still hospitalized at the time of manuscript preparation. The treatments for these patients were shown in . Owing to the lack of epidemic information at present, the transmission modes of the novel CoV remain obscure. It is notable that three of the five patients had a history of recent exposure to a seafood market in Wuhan. However, the origin of infection is unknown at the time of manuscript preparation. It is assumed that the zoonotic CoV jumped to humans through an intermediate host; for example, camel is suspected as the intermediate host of MERS-CoV, whereas the palm civet may have contributed to the interspecies transmission of SARS-CoV to humans. Bat CoVs may evolve to adapt to using humans as a host during their circulation in a mammalian host, thereby enabling them to effectively infect humans. However, two of our patients did not have a history of exposure to the seafood market. Therefore, further investigation will be needed to determine the potential of multiple infection sources responsible for this uncommon outbreak. One of the most striking and concerning features of this virus is its ability to cause severe respiratory syndrome. The disease progressed rapidly with a major presentation of lower respiratory pathology. Notably, no obvious upper respiratory tract symptoms such as a sore throat and rhinorrhea were present in these patients. Therefore, further exploration is needed on the distribution of the viral receptor in the organs to potentially account for pathogenesis development. In addition, the possibility of unrecognized mild infections or subclinical infections should be clarified, as identification of such infections is critical to control spread of the disease. Development of serological assays would be largely beneficial to detect such types of infection at the population level. In conclusion, we identified a novel bat-borne CoV associated with a severe and fatal respiratory disease in humans. The emergence of this virus poses a potential threat to public health. Therefore, clarification of the source and transmission mode of these infections is urgently needed to prevent a potential epidemic.",
     'qas': [{'id': 'ebd2cda5-05b9-457a-ab1b-2494fe3d7c15',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'a7eff086-7f9a-424c-baa7-773a847468f9',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '51885c5d-acbc-4a45-ae78-6c562534f6ec',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '42553dd3-57b9-4a47-8274-dd6821a3c132',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '1d855e66-6913-4647-9449-5063bb6cc407',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '56b6593f-5700-49be-8f3a-c11e67f0395a',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '1e29276f-17f6-4dce-8bfe-2b1b4ccef6f0',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '9435d2c3-fed6-400f-8fa8-df275d93271c',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': 'dab13c0c-6d28-4430-97f6-7c17d17f8258',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '3ef47ec4-9286-41cd-9785-b1af2ba41f6c',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'ddf08448-9ef1-4179-bafd-3b34553caf79',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'Building on our previous experience collating news reports to monitor transmission of Ebola virus, here we present an effort to compile individual patient information and subnational epidemic curves on COVID-19 from a variety of online resources. Data were made publicly available in real time and were used by the infectious disease modelling community to generate and compare epidemiological estimates relevant to interventions. We describe the data generation process and provide an early analysis of age patterns of COVID-19, case counts across China and inter nationally, and delays between symptom onset, admissions to hospital, and reporting, for cases reported until Jan 31, 2020. In this population-level observational study, we used crowdsourced reports from DXY.cn, a social network for Chinese physicians, health-care professionals, pharmacies, and health-care facilities established in 2000. This online platform is providing real-time coverage of the COVID-19 outbreak in China, obtained by collating and curating reports from news media, government television, and national and provincial health agencies. The information reported includes time-stamped cumulative counts of COVID-19 infections, outbreak maps, and realtime streaming of health authority announcements in Chinese (directly or through state media). Every report is linked to an online source, which can be accessed for more detailed information on individual cases. These are publicly available, de-identified patient data reported directly by public health authorities or by state media. No patient consent was needed and no ethics approval was required. We closely monitored updates on DXY.cn between Jan 20, 2020, and Jan 31, 2020, to extract key information on individual patients in near real-time, and reports of daily case counts. For individual-level patient data, we used descriptions from the original source in Chinese to retrieve age, sex, province of identification, travel history, reporting date, dates of symptom onset and seeking care at a hospital or clinic, and discharge status, when available. Individual-level patient data were formatted into a line-list database for further quantitative analysis. Individual-level patient data were entered from DXY.cn by a native Chinese speaker (KS), who also generated an English summary for each patient. Entries were checked by a second person (JC). Since DXY.cn primarily provides  Evidence before this study An outbreak of coronavirus disease 2019 (COVID-19) was recognised in early January, 2020, in Wuhan City, Hubei province, China. The new virus is thought to have originated from an animal-to-human spillover event linked to seafood and live-animal markets. The infection has spread locally in Wuhan and elsewhere in China, despite strict intervention measures implemented in the region where the infection originated on Jan 23, 2020. More than 500 patients infected with COVID-19 outside of mainland China have been reported between Jan 1 and Feb 14, 2020. Although laboratory testing for COVID-19 quickly ramped up in China and elsewhere, information on individual patients remains scarce and official datasets have not been made publicly available. Patient-level information is important to estimate key time-to-delay events (such as the incubation period and interval between symptom onset and visit to a hospital), analyse the age profile of infected patients, reconstruct epidemic curves by onset dates, and infer transmission parameters. We searched PubMed for publications between Jan 1, 1990, and Feb 6, 2020, using combinations of the following terms: ("coronavirus" OR "2019-nCoV") AND ("line list" OR "case description" OR "patient data") AND ("digital surveillance" OR "social media" OR "crowd-sourced data"). The search retrieved one relevant study on Middle East respiratory syndrome coronavirus that mentioned FluTrackers in their discussion, a website that aggregates epidemiological information on emerging pathogens. However, FluTrackers does not report individual-level data on COVID-19. To our knowledge, this is the first study that uses crowdsourced data from social media sources to monitor the COVID-19 outbreak. We searched DXY.cn, a Chinese health-care-oriented social network that broadcasts information from local and national health authorities, to reconstruct patient-level information on COVID-19 in China. We also queried international media sources and national health agency websites to collate data on international exportations of COVID-19. We describe the demographic characteristics, delays between symptom onset, seeking care at a hospital or clinic, and reporting for 507 patients infected with COVID-19 reported until Jan 31, 2020. The overall cumulative progression of the outbreak is consistent between our line list and an official report published by the Chinese national health authorities on Jan 28, 2020. The estimated incubation period in our data aligns with that of previous work. Our dataset was made available in the public domain on Jan 21, 2020. Crowdsourced line-list data can be reconstructed from social media data, especially when a central resource is available to curate relevant information. Public access to line lists is important so that several teams with different expertise can provide their own insights and interpretations of the data, especially in the early phase of an outbreak when little information is available. Publicly available line lists can also increase transparency. The main issue with the quality of patient-level data obtained during health emergencies is the potential lack of information from locations overwhelmed by the outbreak (in this case, Hubei province and other provinces with weaker health infrastructures). Future studies based on larger samples of patients with COVID-19 could explore in more detail the transmission dynamics of the outbreak in different locations, the effectiveness of interventions, and the demographic factors driving transmission. For an example of an online source see https://ncov.dxy.cn/ ncovh5/view/pneumonia information on patients reported in China, we also compiled additional information on internationally exported cases of COVID-19. We obtained data for 21 countries outside of mainland China (Australia, Cambodia, Canada, France, Germany, Hong Kong, India, Italy, Japan, Malaysia, Nepal, Russia, Singapore, South Korea, Sri Lanka, Taiwan, Thailand, United Arab Emirates, the UK, the USA, and Vietnam). We gathered and cross-checked data for infected patients outside of China using several sources, including global news media (Kyodo News, Straits Times, and CNN), official press releases from each country\'s Ministry of Health, and disease control agencies. In addition to detailed information on individual patients, we reconstructed the daily progression of reported patients in each province of China from Jan 13, until Jan 31, 2020. We used the daily outbreak situation reports com municated by provincial health authorities, covered by state television and media, and posted on DXY.cn. All patients in our databases had a laboratory confirmed SARS coronavirus 2 (SARS-CoV-2) infection. Our COVID-19 database was made publicly available as a Google Sheet, disseminated via Twitter on Jan 21, 2020, and posted on the website of Northeastern University, (Boston, MA, USA) on Jan 24, 2020, where it is updated in real time. Data used in this analysis, frozen at Jan 31, 2020, are available online as a spreadsheet. We assessed the age distribution of all patients with COVID-19 by discharge status. We adjusted the age profile of Chinese patients by the population of China. We used 2016 population estimates from the Institute for Health Metrics and Evaluation 9 to calculate the relative risk (RR) of infection with COVID-19 by age group. To calculate the RR, we followed the method used by Lemaitre and colleagues 10 to explore the age profile of influenza, where RR for age group i is defined as where C i is the number of cases in age group i and N i is the population size of age group i. To estimate trends in the strength of case detection and interventions, we analysed delays between symptom onset and visit to a health-care provider, at a hospital or clinic, and from seeking care at a hospital or clinic to reporting, by time period and location. We considered the period before and after Jan 18, 2020, when media attention and awareness of the outbreak became more pronounced. We used non-parametric tests to assess differences in delays between seeking care at a hospital or clinic and reporting between locations (Wilcoxon test to compare two locations and Kruskall-Wallis test to compare three or more locations). We estimated the duration of the incubation period on the basis of our line list data. We analysed a subset of patients returning from Wuhan who had spent less than a week in Wuhan, to ensure a narrowly defined exposure window. The incubation period was estimated as the midpoint between the time spent in Wuhan and the date of symptom onset. We did all analyses in R (version 3.5.3). We considered p values of less than 0·05 to be significant. The funder had no role in study design, data compilation, data analysis, data interpretation, or writing of the report. All authors had access to the data, and had final responsibility for the decision to submit for publication. Our line list comprised 507 patients reported from Jan 13, to Jan 31, 2020, including 364 (72%) from mainland China and 143 (28%) from outside of China The age distribution of COVID-19 cases was skewed towards older age groups with a median age of 45 years (IQR 33-56) for patients who were alive or who had an unknown outcome at the time of reporting (figure 1). The median age of patients who had died at the time of reporting was 70 years (IQR 65-81). Few patients (13 [3%]) were younger than 15 years. Adjustment for the age demographics of China confirmed a deficit of infections among children, with a RR below 0·5 in patients younger than 15 years (figure 1). The RR measure indicated a sharp increase in the likelihood of reported COVID-19 among people aged 30 years and older. A timeline of cases in our crowdsourced patient line list is shown by date of onset in , indicating an acceleration of reported cases by Jan 13, 2020. The outbreak progression based on the crowdsourced patient line list was consistent with the timeline published by China Center for Disease Control and Prevention (CDC) on Jan 28, 2020, 12 which is based on a more comprehensive database of more than 6000 patients with COVID-19. Since Jan 23, 2020, the cumulative number of cases has slowed down in the crowdsourced and China CDC curves (figure 2), which probably reflects the delay between disease onset and reporting. The median reporting delay was 5 days (IQR 3-8) in our data. Province-level epidemic curves are shown by reporting date in . As of Jan 31, 2020, 16 (52%) of 30 provinces in mainland China had reported more than 100 confirmed cases. The apparent rapid growth of newly reported cases between Jan 18, and Jan 31, 2020, in several provinces outside of Hubei province is consistent with sustained local transmission. Across the study period, the median delay between symptom onset and seeking care at a hospital or clinic was 2 days (IQR 0-5 days) in mainland China ( ). This delay decreased from 5 days before Jan 18, 2020, to 2 days thereafter (Wilcoxon test p=0·0009). Some provinces, such as Tianjin and Yunnan had shorter delays (data by province not shown), while the early cases from Hubei province were characterised by longer delays in seeking care (median 0 days [IQR 0-1]). The median delay between seeking care at a hospital or clinic and reporting was 2 days (IQR 2-5 days) in mainland China and decreased from 9 days before Jan 18, 2020, to 2 days thereafter (Wilcoxon test p<0·0001; ). Similarly to delays in seeking care at a hospital or clinic, reporting was quickest in Tianjin and Yunnan (median 1 day [IQR 0-1]) and slowest in Hubei province (median 12 days [IQR 7-16]). The median delay between symptom onset and seeking care at a hospital or clinic was 1 day (IQR 0-3) for international travellers, and shorter than for patients in Hubei province or the rest of mainland China (Kruskal-Wallis test p<0·0001; . Even in the period after Jan 18, 2020, when awareness of the outbreak increased, a shorter delay between symptom onset and seeking care at a hospital or clinic was seen for international patients than for those in mainland China (Wilcoxon test p<0·0001). For international cases, the delay between seeking care at a hospital or clinic and reporting was 2 days (IQR 1-4), also shorter than for mainland China (Wilcoxon test p<0·0001; . On the basis of 33 patients with a travel history to Wuhan, we estimated the median incubation period for COVID-19 to be 4·5 days (IQR 3·0-5·5; appendix p 2).  Information from patient line lists is crucial but difficult to obtain at the beginning of an outbreak. Here we have shown that careful compilation of crowdsourced reports curated by a long-standing Chinese medical social network provides a valuable picture of the outbreak of COVID-19 in real time. The outbreak timeline is consistent with aggregated case counts provided by health authorities. For comparison, China CDC published the first epidemic curve by symptom onset on Jan 28, 2020. Line lists provide unique information on the delays between symptom onset and detection by the health-care system, reporting delays, and travel histories. This information cannot be extracted from aggregated case counts published by official sources. Line list data can help assess the effectiveness of interventions and the potential for widespread transmission beyond the initial foci of infection. In particular, shorter delays between symptom onset and admission to hospital or seeking care in a hospital or clinic accelerate detection and isolation of cases, effectively shortening the infectious period. A useful feature of our crowdsourced database was the availability of travel histories for patients returning from Wuhan, which, along with dates of symptom onset, allowed for estimation of the incubation period here and in related work. A narrow window of exposure could be defined for a subset of patients who had a short stay in Wuhan, at a time when the epidemic was still localised to Wuhan. Several teams have used our dataset and datasets from others to estimate a mean incubation period for COVID-19 to be 5-6 days (95% CI 2-11). Our own estimate (median 4·5 days [IQR 3·0-5·5]) is consistent with previous work that used other modelling approaches. The incubation period is a useful parameter to guide isolation and contact tracing; based on existing data, the disease status of a contact should be known with near certainty after a period of observation of 14 days. Availability of a public dataset enables independent estimation of important epidemiological parameters by several teams, allowing for confirmation and cross-checking at a time when information can be conflicting and noisy. An interesting finding in our data relates to the age distribution of patients. We found a heavy skew of infection towards older age groups, with substantially fewer children infected. This pattern could indicate agerelated differences in susceptibility to infection, severe outcomes, or behaviour. However, a substantial portion of the patients in our database are travellers, a population that is usually predominantly adults (although does not exclude children). Furthermore, because patient data in our dataset were captured by the health system, they are biased towards the more severe spectrum of the disease, especially for patients from mainland China. Clinical reports have shown that severity of COVID-19 is associated with the presence of chronic conditions, which are more frequent in older age groups. Nevertheless, we would also expect children younger than 5 years to be at risk of severe outcomes and to be reported to the healthcare system, as is seen for other respiratory infections. Biological differences could have a role in shaping these age profiles. A detailed analysis of one of the early COVID-19 clusters by Chan and colleagues 19 revealed symptomatic infections in five adult members of the same household, while a child in the same household aged 10 years was infected but remained asymptomatic, potentially indicating biological differences in the risk of clinical disease driven by age. Previous immunity from infection with a related coronavirus has been speculated to potentially protect children from SARS, and so might also have a role in COVID-19. In any case, if the age distribution of cases reported here was to be confirmed and the epidemic were to progress globally, we would expect an increase in respiratory mortality concentrated among people aged 30 years and older. This mortality pattern would be substantially different from the profile of the 2009 influenza pandemic, for which excess mortality was concentrated in those younger than 65 years. In our dataset, we saw a rapid increase in the number of people infected with COVID-19 in several provinces of China, consistent with local transmission outside of Hubei province. As of Jan 31, 2020, province-level epidemic curves are only available by date of reporting, rather than date of symptom onset, which usually inflates recent case counts if detection has increased. D e c 9 , 2 0 1 9 D e c 1 6 , 2 0 1 9 D e c 2 3 , 2 0 1 9 D e c 3 0 , 2 0 1 9 J a n 6 , 2 0 2 0 J a n 1 3 , 2 0 2 0 J a n 2 0 , 2 0 2 0 J a n 2 7 , 2 0 2 0 Furthermore, province-level data include both returning travellers from Hubei province (ie, importations) and locally acquired cases, which also usually inflate the apparent risk of local transmission. Notably, other lines of evidence suggest that local transmission is now well established outside of Hubei province, because travel increased just before the Chinese New Year on Jan 25, 2020, and before implementation of the travel ban in Wuhan. Accordingly, our own data include evidence of transmission clusters in non-travellers, with, for instance, a second-generation transmission event reported in Shaanxi on Jan 21, 2020. Our study had several limitations, one of which was the data we used. Although all provinces in mainland China provide aggregated information on infections and deaths, individual-level patient descriptions are only available for a subset of provinces. Geographical coverage is heterogeneous in our line list, and we have a notable deficit of cases from Hubei province, the foci of the COVID-19 outbreak. We expect that little patient-level information is shared on social media by province-level and city-level health authorities in Wuhan and Hubei province because health systems are overwhelmed. For similar reasons, provinces with a large total case count at the end of January, 2020, or with a weaker health infrastructure, were under-represented in our line list, with the exception of Beijing. Other limitations in our data include severity (only patients who had severe enough symptoms to seek care were captured) and changes in case definition. A series of epidemiological criteria were required for COVID-19 testing, including travel history to Wuhan within the past 2 weeks; residence in Wuhan within the past 2 weeks; contact with individuals from Wuhan (with fever and respiratory symptoms) within the past 2 weeks; and being part of an established disease cluster. Some of these criteria (eg, relation to Wuhan) were relaxed over time (appendix). As a result, we have an overrepresentation of travel-related cases in our database. The reproduction number is an important quantity for outbreak control. We refrained from estimating this parameter because reporting changes could bias estimates relying on epidemic growth rates. Furthermore, our dataset captured cases all over China and does not reflect transmission patterns in any particular location. A mean reproduction number of 2·5-2·7 has previously been estimated on the basis of the volume of importations of international cases in the pre-intervention period in Wuhan. We recognise that, although our data source is useful and timely, it should not replace official statistics. Manual compilation of detailed line lists from media sources is highly time consuming and is not sustainable when case counts reach several thousands. Here we provide detailed data on 507 patients when the official case count was over 9000 by Jan 31, 2020, representing a sample of approximately 5% of reported cases and a much smaller proportion of the full spectrum of COVID-19 cases, which include mild infections. A crowd sourced system would not be expected to catch all cases, especially if many cases are too mild to be captured by the health-care system, digital surveillance, or social media. Notably, DXY.cn does not generate data outside of traditional surveillance systems but rather provides a channel of rapid communication between the public and health authorities. In turn, our approach has helped extract and repackage information from health authorities into an analytical format, which was not available elsewhere. At the time of writing, efforts are underway to coordinate compilation of COVID-19 data from online sources across several academic teams. Ultimately, we expect that a line list of patients will be shared by government sources with the global community; however, data cleaning and access issues might take a prohibitively long time to resolve. For the west African Ebola outbreak, a similarly coordinated effort to publish a line list took 2 years. Given the progression of the COVID-19 outbreak, such a long delay would be counterproductive. Overall, the novelty of our approach was to rely on a unique source for social media and news reports in China, which aggregated and curated relevant information. This approach facilitated entry of robust and standard data on clinical and demographic information. Reassuringly, DXY.cn maintains a special section dedicated to debunking fake news, myths, and rumours about the COVID-19 outbreak. Looking to the future, collection of patient data in the context of emergencies could include information on whether patients are identified through contact tracing or because they seek care on their own. Furthermore, data interpretability could be improved by gathering more quantitative information on how case definitions are used in practice. In conclusion, crowdsourced epidemiological data can be useful to monitor emerging outbreaks, such as COVID-19 and, as previously, Ebola virus. These efforts can help generate and disseminate detailed information in the early stages of an outbreak when little other data are available, enabling independent estimation of key parameters that affect interventions. Based on our small sample of patients with COVID-19, we note an intriguing age distribution, reminiscent of that of SARS, which warrants further epidemiological and serological studies. We also report early signs that the response is strengthening in China on the basis of a decrease in case detection time, and rapid management of travel-related infections that are identified internationally. This is an early report of a rapidly evolving situation and the parameters discussed here could change quickly. In the coming weeks, we will continue to monitor the epidemiology of this outbreak using data from news reports and official sources. KS and CV contributed to the study design. KS and JC contributed to the data compilation. KS, JC, and CV contributed to data analysis. KS and JC contributed to the design and drawing of figures. KS, JC, and CV contributed to the writing of the manuscript. We declare no competing interests. All data used in this report have been made publicly available on the Laboratory for the Modeling of Biological + Socio-technical systems website of Northeastern University. The available data include daily case counts of COVID-19 by reporting date and Chinese province, and a de-identified line list of patients with COVID-19. The line list includes geographical location (country and province), reporting date, dates of symptom onset and seeking care at a hospital or clinic, relation to Wuhan, discharge status when known, an English summary of the case description from media sources, and a link to the original source of data. Seeking care at hospital or clinic to report Symptom onset to seeking care at hospital or clinic',
     'qas': [{'id': '06321a9e-641f-495b-8a7f-41c4f761b175',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'c9a8f750-1384-40b7-ab14-96a228fb23ad',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '15deb1cb-6e45-426d-8e6d-25311e2f0be7',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'f030547a-1f94-42a2-9096-283611b8e8df',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': 'c248bfc4-c55d-43a6-a087-91bbbe235b2e',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '62176363-e0c7-4a0e-b37a-f3bfbeb7c140',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '9e1bf3a6-eb28-435a-84f6-cda23cf63666',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '7039ec8e-61ba-4a6d-af98-29a5d36eff08',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '875769e4-23d8-434a-b997-69c88829f546',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '55079edd-d2c2-4798-8342-ee4c26964733',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'e4a00754-e2d3-4370-954e-bf27aae887ab',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'The novel Coronavirus outbreak, (previously known as the 2019-nCoV and later renamed COVID-19 during the writing of this manuscript) is leading to the closure of entire cities in China, and causing stringent measures to be taken in others. While in distant different continents, far from China where the virus was first reported, places are being placed on high alert. In Wuhan, where the virus broke, schools, roads and markets have been shut down . The same is true in Hong Kong, Beijing and Hubei Province amongst surrounding areas, as precautionary measures are being emphasized to ensure that the spread of the virus is minimized, and complete and accurate information on the virus is being obtained . However, the rate of spread of the virus and the uncertainties surrounding the entire situation has led the World Health Organization (WHO) on 30 January 2019 to declare the Coronavirus outbreak a \'Global Public Health Emergency\'. WHO determined, however, not to declare the outbreak a \'Public Health Emergency of International Concern\' (PHEIC) which is a higher level of declaration. A PHEIC is defined as "an extraordinary event which is determined to constitute a public health risk to other States through the international spread of disease and to potentially require a coordinated international response" whose scope may include: serious, sudden, unusual or unexpected; carries implications for public health beyond the affected State\'s national border; and may require immediate international action . With the world having experienced some notable influenza pandemics in the past, a Global Initiative on Sharing All Influenza Data (GISAID) platform was established and was instrumental in the rapid sharing of information by the Chinese scientists regarding the emergence of the COVID-19 virus. Through this platform, scientists from other regions were observed to gain access to information and are, subsequently, able to act in a much faster capacity; like in the case of scientists from the Virus Identification Laboratory based at Doherty Institute, Australia, who managed to grow a similar virus in the laboratory after accessing the data shared by the Chinese scientists . Beyond the aspect of pandemic preparedness and response, the case of COVID-19 virus and its spread provide a fascinating case study for the thematics of urban health. Here, as technological tools and laboratories around the world share data and collectively work to devise tools and cures, similar efforts should be considered between smart city professionals on how collaborative strategies could allow for the maximization of public safety on such and similar scenarios. This is valid as smart cities host a rich array of technological products that can assist in early detection of outbreaks; either through thermal cameras or Internet of Things (IoT) sensors, and early discussions could render efforts towards better management of similar situations in case of future potential outbreaks, and to improve the health fabric of cities generally. While thermal cameras are not sufficient on their own for the detection of pandemics -like the case of the COVID-19, the integration of such products with artificial intelligence (AI) can provide added benefits. The fact that initial screenings of temperature is being pursued for the case of the COVID-19 at airports and in areas of mass convergence is a testament to its potential in an automated fashion. Kamel Boulos et al. supports that data from various technological products can help enrich health databases, provide more accurate, efficient, comprehensive and real-time information on outbreaks and their dispersal, thus aiding in the provision of better urban fabric risk management decisions. The above improvements in the healthcare sector can only be achieved if different smart city products are fashioned to support standardized protocols that would allow for seamless communication between themselves. Weber and Podnar Žarko suggest that IoT devices in use should support open protocols, and at the same time, the device provider should ensure that those fashioned uphold data integrity and safety during communication and transmission. Unfortunately, this has not been the case and, as Vermesan and Friess explain, most smart city products use proprietary solutions that are only understood by the service providers. This situation often creates unnecessary fragmentation of information rendering only a partial integrated view on the dynamics of the urban realm. With restricted knowledge on emergent trends, urban managers cannot effectively take decisions to contain outbreaks and adequately act without compromising the social and economic integrity of their city. This paper, inspired by the case of the COVID-19 virus, explores how urban resilience can be further achieved, and outlines the importance of seeking standardization of communication across and between smart cities. With the advent of the digital age and the plethora of Internet of Things (IoT) devices it brings, there has been a substantial rise in the amount of data gathered by these devices in different sectors like transport, environment, entertainment, sport and health sectors, amongst others . To put this into perspective, it is believed that by the end of 2020, over 2314 exabytes (1 exabyte = 1 billion gigabytes) of data will be generated globally from the health sector. Stanford Medicine acknowledges that this increase, especially in the medical field, is witnessing a proportional increase due to the increase in sources of data that are not limited to hospital records. Rather, the increase is being underpinned by drawing upon a myriad and increasing number of IoT smart devices, that are projected to exponentially increase the global healthcare market to a value of more than USD $543.3 billion by 2025 . However, while the potential for the data market is understood, such issues like privacy of information, data protection and sharing, and obligatory requirements of healthcare management and monitoring, among others, are critical. Moreover, in the present case of the Coronavirus outbreak, this ought to be handled with care to avoid jeopardizing efforts already in place to combat the pandemic. On the foremost, since these cut across different countries, which are part of the global community and have their unique laws and regulations concerning issues mentioned above, it is paramount to observe them as per the dictate of their source country\'s laws and regulations; hence, underlining the importance of working towards not only the promoting of data through its usage but also the need for standardized and universally agreed protocols. While the significance of such data in advancing efficiency, productivity and processes in different sectors is being lauded, there are criticisms arising as to the nature of data collection, storage, management and accessibility by only a small group of users. The latter particularly includes select ICT corporations that are also located in specific geographies . These criticisms are justified, as in recent years, big data is seen as the new \'gold rush\' of the 21st century and limiting its access means higher economic returns and increased influence and control at various scales to those who control data. These associated benefits with big data are clearly influencing geopolitical standings, in both corporate and conventional governance realms, and there is increased competition between powerful economies to ensure that they have the maximum control of big data. As case in point is the amount of \'push and pull\' that has arisen from Huawei\'s 5G internet planned rollout . Though the latter service offers unprecedented opportunities to increase internet speeds, and thereby influence the handling of big data, countries like the U.S. and some European countries that are key proponents and players in global political, economic and health landscapes, are against this rollout, arguing that it is a deceptive way of gathering private data under the guise of espionage. On this, it has been noted that the issue of data control and handling by a few corporations accords with their principles of nationalism, and that these work for their own wellbeing as well as to benefit the territories they are registered in. Therefore, geopolitical issues are expected on the technological front as most large data-rich corporations are located in powerful countries that have influence both economically, health-wise and politically . Such are deemed prized tokens on the international landscape, and it is expected that these economies will continue to work towards their predominant control as much as possible. On the health sector, the same approach is being upheld where critical information and data are not freely shared between economies as that would be seen to be benefiting other in-competition economies, whereas different economies would cherish the maximization of benefits from such data collections. In addition to the obvious deep-rooted social issues related to nationalism, other challenges include the increasing movement of people globally that is being enhanced by reduced costs and higher speed. In particular, these challenges are more pronounced when it comes to public health. This is because most of the health-related data collected not only can compromise local nations, but also captures those of travelers. In such cases, in a bid to improve the health status of a nation, it becomes paramount to factor in data from other regions necessitating unhindered sharing of this data. Such data-sharing truth is emphasized in situations like the recent case of Coronavirus outbreak threatening the global health environment, facilitated by air transportation. The virus was first reported in Wuhan, China, and in a matter of three weeks (by 17th January 2020) over 300 cases were confirmed in that region, and 10 days later (26th January 2020), a total of 2014 cases of Coronavirus have been reported, with 684 of those being confirmed, and with 29 reported outside China. The fatalities from the virus stands at 56 as of 26th January 2020 . The virus had then been confirmed in various countries including Taiwan, South Korea, Japan, Thailand, France, the United States, Singapore and Vietnam . In the above case, though major cities are known to prepare themselves for potential outbreaks, their health policies and protocols are observed to diverge from one another. Thus, without a global collaborative approach, progress towards working for a cure and universally acceptable policy approach can take longer. Such fears, of a lack of international collaboration, were highlighted by the World Health Organization (WHO) during an emergency meeting in Geneva on 22nd January 2020 to determine whether the virus outbreak had reached a level warranting international emergency concern. However, WHO was satisfied that China was being proactive in this case, unlike in 2002, when China withheld information on the outbreak for far too long, causing delays in addressing the epidemic . As in this instance, it is the opinion in this paper that if there was seamless collaboration and seamless sharing of data between different cities, it would not warrant such a high-level meeting to result in action, and instead, a decision could have been made much earlier. On this, the saddest part is that some global cities are less prepared to handle the challenges posed by this type of outbreak for lack of information on issues like symptoms of the virus, the protective measures to be taken, and the treatment procedures that an infected person should be processed through, amongst other issues. With the Coronavirus , it took only 17 days (31st December 2019 to 17th January 2020) to be identified. The sharing of data has also been quicker, as immediately after the virus\' genetic sequence was discovered, Chinese scientists were able to share the information with the WHO, thus helping in its identification and enabling the auctioning of precautionary measures in other countries. Latest technological tools have also allowed for the receipt of information in realtime, in contrast to traditional epidemiological approaches that would have required months to identify the outbreak type . Similarly, though substantial data and information on the disease has been shared, Wetsman acknowledges that there is a lack of some vital information, like the ease of spread of the virus from person-to-person, and this is a key to containing the disease as interactions between people from different parts of the globe are still active. This hindrance can be made further possible as many cities advance in their smart and safe city model implementation towards constructing sufficient soft and hard urban infrastructures equipped with, for example, thermal imagery sensors to allow for early detections. However, while that is the case, data access to many is a challenge because the information is often seen as being sensitive for national security reasons, whilst at the same time, acknowledging that a virus outbreak is an equal threat to both national security and the economy. The outbreak of any disease has significant impacts on local economies across the globe. For instance, when SARS (Severe Acute Respiratory Syndrome) (SARS-CoV) broke in China in 2002, it was estimated, that the Asian region incurred tremendous negative impacts socially, health-wise and economically, potentially amounting to Asian regional economy losses of between USD $12-18 billion from tourism, travel and retail sales industries alone . The Zika virus outbreak, spread by daytime-active Aedes mosquitoes, is estimated to have cost equator-belt local economies in affected areas between USD $7 and USD $18 billion . The Ebola virus (or Ebola hemorrhagic fever (EHF)) caused an estimated loss of USD $2.2 billion in GDP in three West African economies (Guinea, Liberia and Sierra Leone) in 2015 alone . In regard to the current epidemic of Coronavirus, though it is too early to quantify or project its impacts on the global economy, there are fears that it may take the precedent of other outbreaks where billions of dollars will be lost. The foundations for this escalating loss can be witnessed in the rapid growth of travel bans being enacted by some countries and their international airports, especially specifically restricting people from visiting the affected regions in China and their growth into general non-Chinese travel movements. On this, noting that the outbreak came almost on the eve of the Lunar New Year celebrations, and that it had been estimated that over 400 million people were expected to travel in different parts of the world and China to observe this festivity, the majority have had to reconsider their options as to flights, hotels and entertainment events due to service provider cancellations . Those who had already booked their flights are expected to receive their refunds following the directive by the Civil Aviation Administration of China, however, this move has already affected the share value of Chinese airline companies . The above impacts demonstrate that the issues of virus outbreaks transcend urban safety and impacts upon all other facets of our urban fabric. Therefore, it becomes paramount to ensure that the measures taken to contain a virus transcend nationalist agendas where data and information sharing is normally restricted, to a more global agenda where humanity and global order are encouraged. With such an approach, it would be easier to share urban health data across geographies to better monitor emerging health threats in order to provide more economic stability, thereby ensuring no disruptions on such sectors like tourism and travel industries, amongst others. This is possible by ensuring collaborative, proactive measures to control outbreak spread and thus, human movements. This would remove fears on travelers, and would have positive impacts upon the tourism industry, that has been seen to bear the economic brunt whenever such outbreaks occur. This can be achieved by ensuring that protocols on data sharing are calibrated to remove all hurdles pertaining to sharing of information. On this, Lawpoolsri et al. posits that such issues, like transparency, timelessness of sharing and access and quality of data, should be upheld so that continuous monitoring and assessment can be pursued. Virus outbreaks in recent years have shown that, in the urban realm, data, including health data, can be sourced from diverse places. Presently, in the case of Coronavirus (COVID-19) outbreak, data is being collected from airports through screening and monitoring, through the use of smart sensors installed in airport infrastructures and from personnel working in those air/seaports. For instance, it has been reported that in the U.S.A., screening is being carried out at 20 different airports to ensure that possible affected people are intercepted for quarantine at the point of entry. Beside airports, as reported by Buckley and May , data is also being collected at bus terminals, market places (in Wuhan), subways, and also in health facilities where patients are taken for further medical attention. Such is prevalent especially in China, and other Asian regions where cases of the virus have been recorded and confirmed. In addition to these methods, other smart city data sources include the application of terminal tracking systems that are mostly emphasized in Safe City concepts, where, at the point of entry or departure, relevant data is collected and analyzed. Li et al. highlights that sensors installed in such locations have the potential to receive and distribute data in real-time to digital infrastructures within the network, and their interconnectedness in the network renders them extremely efficient in providing real-time updates on different issues. Urban areas are also known to be amassed with numerous Urban Health sensors, some of which are wearable. Though these are not specifically fashioned to track the present case of virus outbreak, they are able to track other related parameters like heartbeat, blood pressure, body temperature and others variables, that when analyzed can offer valuable insights. Loncar-Turukalo et al. hail these devices for their role in transforming the health care sector especially by allowing for Connected Health (CH) care, where data collected from them can be analyzed and provide insightful information on the health scenario in any given area. Vashist et al. further highlight how emerging features such as spatiotemporal mapping, remote monitoring and management, and enhanced cloud computing capabilities can emanate from such endeavours, leading to better urban management potential. While it is true that the basic source of medical data is generally sourced from general practitioners or medical laboratories-a fact that has also been affirmed in the case of the current epidemic-this paper explores how data sourced from an urban perspective can contribute to the medical narrative. The conviction to dwell on the urban realm in this manuscript is based on the fact that the current epidemic (COVID-19) is transmitted majorly through human-to-human contact, and in most cases, especially where the spread is reported in a different country, the first point of contact is an urban area, where large groups of people convene, like airports or subway stations. In most cases, such facilities, which are mostly based in urban areas, are observed to have installed surveillance technologies to ensure that anyone showing any symptoms of the disease are identified and quarantined. However, even in such cases, as underlined in the present manuscript, the need for anonymizing medical data is emphasized to ensure that the use of current technologies does not breach data privacy and security requirements, across different geographies. In this case, novel technologies like Blockchain technologies and quantum cryptography can aid in the discussion and be made to integrate with data collecting technologies. This would render an increased wealth of data from both the medical field and smart city operators, while ensuring privacy and security; hence, aiding in providing relevant information for better informed decisions. However, despite the indisputable roles that installed devices play in providing relevant health information, their data communication aspect needs to be reviewed. First, communications are seen to be geography-restricted (restricted to a given location), such that they seldom expand or communicate with their like, installed beyond their restricted areas. Secondly, these devices are usually sourced and installed by separate corporations that maintain unique and specific standards for data processing and sharing, and accordingly, tying cities to the sole usage of their product(s). Such strategies are adopted as private corporations try to maximize their economic gains, since the digital solution market is a lucrative one and is expected to continue growing and expanding . For its current application, the standardization of protocols as elaborated in this manuscript need to be pursued to ensure that there is seamless sharing of information and data. By doing this, it is expected that issues like burdens of collecting data, accuracy and other complexity that are experienced (when systems are fragmented) are reduced or eliminated altogether. The standardization can be achieved by, for example, ensuring that all the devices and systems are linked into a single network, like was done in the U.S., where all the surveillance of healthcare were combined into the National Healthcare Safety Network (NHSH) . The fact that cities are increasingly tuning on the concept of Smart Cities and boasting an increased adoption rate of technological and connected products, existing surveillance networks can be re-calibrated to make use of those new sets of databases. Appropriate protocols however have to be drafted to ensure effective actions while ensuring privacy and security of data and people. With scenarios like the present Coronavirus (COVID-19) outbreak, that not only impacts upon the economic status of cities, but also affects their social standing, it becomes imperative to emphasize the adoption of universal standards for data sharing. Such a move could have far reaching impact across cities and territories especially in positively combating outbreaks and disasters in a quicker, safer and standardized way, such that when the cure is discovered, the results can be replicated in various parts of the globe. With a collaborated data sharing protocol, it would be possible to have a larger dataset resulting in increased processing capabilities especially with technologies that are powered by artificial intelligence (AI) tools. Through this way, as noted by Jiang et al. and Allam , it would be possible to facilitate early detection, achieve better diagnosis and provide better urban management decisions for increased efficiency for virus containment. An example of how beneficial collaboration and sharing of data can be occurred during the 2014 Ebola outbreak in West Africa where scientists, health workers and clinicians, amongst other stakeholders from around the world, openly worked together and were able to contain the spread of this pandemic . On this front, Boué et al. highlight that levels of trust and transparency need to be reviewed and enhanced to facilitate unfettered data generation and sharing. Such could lead to an even earlier detection scenario of future virus outbreaks, and in the better curative management of the same, without minimal compromise on urban functions and on an urban economy. Furthermore, in cases of emergencies like the current outbreak of COVID-19 and any other, the need for observance of regulatory practices and international healthcare guidelines are paramount. This would ensure that both healthcare professionals and the general populace are informed, protected and remain within the prescribed rules and regulations. As noted by the WHO , the healthcare guidelines and regulatory practices are advanced to also ensure that the health risk in question is reduced together with its consequences. In the current era of technological advancement, such regulations and guidelines are paramount as they have potential to lead to positive or negative outcomes. The position of this paper is to advance that it now possible to integrate technologies like the use of smart devices through IoT networks and wearable devices, data from mobile apps and others to help users to share information with accredited and certified health professionals, and in this case, improve the outcomes for better cross disciplinary and more resilient protocols and policies.',
     'qas': [{'id': 'c9260a42-7b98-459f-82cb-e96a05dfcf01',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '11f48ca4-8c53-496c-8db0-ba40ca9af5a5',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '671f1edf-5c21-45ab-8b6f-575bd8a91c77',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'eb15d1c4-959c-4c03-890d-1b62e55474f3',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '327bfcc4-a0a4-4795-b1db-f0d43986aa87',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': 'ea955b0f-3850-474c-90a7-a802d764c890',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': 'f672bdac-5de4-4f2b-821e-240da877ac97',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '4e1070be-a1a2-48c7-bfe9-cd52e4664ae0',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '0014ea7b-9402-41e8-8e8a-5855d1623896',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '8dab13ed-8a1f-4b76-8efe-09b444168157',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': '4e080210-49bf-4311-9061-444376de0b9e',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'Organization have issued interim guidelines in order to protect the population, and to attempt to prevent the further spread of COVID-19 from infected individuals . In order to reduce the time to identification of a person under investigation (PUI) for the COVID-19 infection, and the rapid isolation of this individual, we propose to collect the basic travel history along with the more common manifestations using a phone-based online survey. Such collected data can be used to assist in the preliminary screening and early identification of possible COVID-19 infected individuals. Thousands of data points are able to be collected and processed through an artificial intelligence (AI) framework which can ultimately evaluate individuals that may be infected and stratify them into no-risk, minimal-risk, moderate-risk, and high-risk of being infected with the virus. The identification of the high-risk cases can then be quarantined earlier, thus decreasing the chance of spread. is inserted here. See Appendix I for the details on the steps involved in data collection on all the respondents independent of whether or not they think they are infected. The AI algorithm described in Appendix II is to identify possible case identifications and send alerts to the nearest health clinic as well as to the respondent for an immediate health visit, we call this as an "alert for health check recommendation for COVID-2019. In case the respondent is unable to commute to the health center, the health department can then send an alert to a mobile health unit so they can then do doorto-door assessments and even testing for the virus. This generates alert for mobile health check recommendation for 2019-nCoV (MHCRC). If a respondent does not have an immediate risk of having symptoms or signs related to the viral infection, then the AI-based health alert will be sent to the respondent to notify them that there is no current risk of COVID-2019. summarizes the outcomes of data collection and identification of possible cases. The data recorded in step 5 of the algorithm using signs and symptoms will be collected prior to both the groups who have received alerts HCRC or MHCRC (for possible identification and assessment) and NCRC (for non-identified respondents). These are explained in steps (iii) and (iv) in the Appendix II. The extended analysis proposed will help to understand if there is any association with different sociodemographic variables and the manifestations such as fever and signs and lower respiratory infections, including cough and SOB in individuals defined as either with and without possible infection. Applications of AI and deep learning argued to be useful tools in assisting diagnosis and treatment decision making . There were studies which promoted disease detection through AI models . Use of mobile phones and web based portals have been tested successfully in health related data collection. However, one need to apply such techniques in a timely way for faster results. Apart from cost-effectiveness, the proposed modeling will be of great assistance in identifying and controlling when populations are closed due to virus spread. In addition to these, our proposed algorithm can be easily extended to identify individuals who might have any mild symptoms and signs. We have developed our data collection criteria based on CDC\'s Flowchart to Identify and Assess 2019 Novel Coronavirus and added additional variables for the extended utility of our efforts in identifying infected and controlling the spread (see ). Let be the outputs recorded during the data collection steps 1 (ii) If the set of identifiers, , for is equal to one of the elements of the set then send HCRC or MHCRC to that respondent, else proceed to the test criteria (iv). If is equal to one of the elements of the set , for then the respondent will be sent an NCRC alert. (iv) If is equal to one of the elements of the set , then the respondent will be sent an NCRC alert. Comparison of test criteria results of (iii) and (iv) with their corresponding geographic and socio-demographic details will yield further investigations of signs and symptoms Suppose we define two events and using the sets and as below:',
     'qas': [{'id': 'fda13c1f-6e98-4ed0-8a90-8821707ad50a',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '5d0b8380-9a8f-4285-b704-3ebc0f82aea2',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': 'b99a78db-3486-433d-a44c-0798d542cb25',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'c8a1abc1-7dd6-4398-ae87-985c771e754b',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '3b5d74e5-9666-4210-8c06-128771140c63',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '117f9d0e-0655-49f3-a406-bd427e724220',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '7e27be9b-391a-416c-8e30-25770a922377',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '12403066-1985-4bf3-9205-8b6571db3763',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '11dd46b2-b115-4126-89c6-1e58cc180f41',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '975b85c9-c07c-4d0e-9cb4-8a5649a379c4',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'ff167123-dae1-460b-a137-1ecdf391a563',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': "Infectious diseases are responsible for 25% of the annual global deaths . Epidemics arising from these diseases are unpredictable, carry uncertain, varying risks, and narratives in different contexts . It is important that the scholarly literature represents the diverse and sometimes competing, narratives from all affected particularly the most vulnerable . Arguably, national and global responses to epidemics are inherently political. The experts selected for consultation, the evidence used to inform response pathways, and narratives of blame, vulnerability, and responsibility are politically driven, and require analysis. In this paper, epidemics refer to a spike, above normal, in the prevalence of a specific disease in a specific population (Centre for Disease Control and Prevention 2012). The literal definition of politics is the Bactivities that relate to influencing the actions and policies of a government or getting and keeping power in a government^ . However, in the Bpolitics of epidemics^literature, politics often refers to the extensive and diverse influence of local, national, and international governments and organizations, on the health outcomes of communities during disease outbreaks. For the purposes of this paper, politics will also refer to how scholars talked about epidemics, and whose narratives are reported in the reviewed literature. Several key themes emerge when examining scholarly narratives about the politics of epidemics namely the socioeconomic distribution of disease, decision-making in research and development, the credibility of evidence that informs response pathways, and attribution of responsibility for causing the outbreak and determining who is responsible for responding. While the literature presents obviously competing narratives that explain disease outbreaks, a close examination of these reveals a high prevalence of certain narratives, which reflect the influence of power and privilege. Hence, some of the literature has called for increased representation of the narratives from the most marginalized populations who bear the brunt of epidemics, in the scholarly literature . Furthermore, the decisions with regard to the type of research, the study population and the research beneficiaries often lack transparency, are also dominated by the most powerful. The case of the swine flu, dubbed the Bpandemic that never really was^ , demonstrates the potential consequences of private and uncommunicated decision-making processes in research and development. In the case of the swine flu, allegedly scientists conducting research and advising the World Health Organization on the virus were compensated by drug companies. This may have introduced bias . By increasing fear of a global pandemic, scientific research funded by pharmaceutical companies justified the stockpiling of antiviral supplies, thus increasing their vaccine sales . Fear mongering has been criticized in the literature, since it has the potential to distort the evidence, which should guide credible decision-making. It is therefore critical to assess the credibility and quality of the evidence that is being used to inform the response to epidemics around the world, particularly in light of the complicated and often problematic relationship between the academic and industry . In determining the credibility of evidence, consideration should be given to the legitimacy and authority of its producer(s). Given these sociopolitical issues that characterize epidemics, a comparative analysis of the narratives on the politics of epidemics is relevant. To date, most of the narratives in the scholarly literature on the politics of epidemics have considered one epidemic in one context . We hypothesize that the narratives may vary depending on the kind and context of the epidemic. The overall aim of this review paper is to discuss the scholarly narratives on the politics of disease epidemics by diseases and income level; Ebola in a low-income setting, Zika in a middle-income setting, and SARS in a high-income setting ). This analysis allowed for the consideration of the role that socioeconomic, geographical, and cultural context might play in the narratives on the attribution of blame and response to disease outbreaks. The paper is based on a review of the peer-reviewed published medical, social, and political literature, which was accessed using four electronic databases-PubMed, Sociological Abstracts, Scholars Portal, and Web of Science. The search was limited to full text articles published between 2002 and 2017. A similar approach was used in identifying the relevant articles. For each epidemic, the search terms included the name of the disease outbreak and the country of interest: BSARS AND Toronto,^BZika AND Brazil,^and BEbola AND Liberia.^It was vital that each search included the income setting to ensure that the findings were contextually relevant. These terms were then combined with the terms Bsociet*,^Bsociol*,^and Bpolitic*.^Truncation was used to ensure inclusion of all terms, including Bsociety,^Bsocietal,B sociological,^Bsociology,^Bpolitics,^Bpolitical,B politician,^and Bpoliticization.^The search terms must have Tables 2, 3, and 4 summarize the search results for the three outbreaks within the specified contexts. The titles and abstracts of the search results were reviewed for relevance to the politics of epidemics. All biomedical articles describing biomedical research were excluded. All articles that were relevant to the study topic were retrieved and reviewed. The initial review involved RA, grouping the search results according to the disease outbreak. For each outbreak, RA first scanned through a couple of papers, identifying the emerging themes. Once these were identified, subsequent reviews were structured along these themes, although an open stance was maintained throughout to enable the reviewer to identify any additional relevant themes. This review focused on the SARs epidemic in Canada, specifically Toronto, Ontario; to represent an epidemic in a high income country. summarizes the search results. The literature on SARS spoke to two of the four themes of the politics of epidemics, namely the credibility of evidence informing response pathways and the attribution of infectious disease responsibility. The use of quarantine as a control measure, although considered to be highly effective, is controversial. A telephonebased survey in the Greater Toronto Area aiming to ascertain public perceptions of the use of quarantine found that while quarantine was perceived to be a necessary and effective strategy, its ethical implementation should involve the collaboration of policy-makers, public health organizations, and the general population, and should be closely regulated to ensure appropriate use and protection of individual rights ). Despite these recommendations, Toronto quarantined significantly more people during the SARS outbreak compared to the other affected cities, including Hong Kong and Shanghai . Given the reported psychological distress reported by those quarantined, Toronto might have considered other strategies, such as the use of face masks to better Bdistribute the burden of containment measures . Critics note that the extensive quarantining in Toronto lacked proper policies and procedures to guide its implementation . Others highlight a lack of public record detailing any consultation between public health officials and the Ontario Human Rights Commission . There was little public scrutiny, which was suggested to be the result of effective conditioning of the public consciousness to believe that quarantining recommendations would be made fairly and legitimately by senior public health officials . Ultimately, it was not quarantining that was problematic, but the lack of apparent or sufficient evidence to guide its implementation. Beyond extensive quarantining, The World Health Organization issued travel-advisories as an additional control measure to contain further national and international spread of SARS from Toronto . This travel-advisory cost Toronto $1.1 billion and restricted the international right for freedom of movement . criticizes these travel-advisories for various reasons. For example, the advisories were not made by the WHO in consultation with Toronto authorities and led to an uneven global distribution of the burden of SARS . Furthermore, the travel advisories were based on old data resulting from delayed communication between the federal government of Canada and the WHO, as information had to first travel from municipal to provincial to federal health authorities . Once again, the evidence used to inform the response was outdated and therefore considered unjustified. As such, there were problems with both the lack of quality evidence and the travel advisory as a response. Quarantining and travel advisories reflect the profound ethical and political implications inherent in responding to infectious disease outbreaks. The evidence on the efficacy of the two primary responses-quarantining and travel advisorieswas inadequate to justify the extent of their implementation. In the case of SARS in Toronto, the literature reported on both forms of responsibility. While some of the literature attributed the responsibility to the Canadian health care system for being unprepared to manage SARS, the other literature tended to (or report on the) how the public attributed blame to the Asian-Canadian community for Bbringing^the outbreak. The latter narratives ultimately led to the racialization of the epidemic.  The narratives on attribution of responsibility for managing the SARs outbreak is most prevalent in the literature, with the limited capacity of the Canadian Health Care System to prepare for and respond to an emerging infectious disease as a main narrative. Many scholars pointed to flaws in the funding and organization of Canada's health care system-federally, provincially, and municipally-to explain the SARS outbreak in Toronto. Some argued that information was communicated inefficiently and was often incorrect . Personal protective equipment was in short supply , and the use of occupational health and safety in Toronto's hospital system was inefficient, leading to further spread ). Toronto's hospital system was also blamed for lacking admission policies and public laboratories : an inability to supply health services when demand suddenly increased and a lack of quality leadership ). Toronto was also ill-prepared to address the unique vulnerability of the homeless to SARS . Inadequate collaboration between the various levels of government in Canada was blamed for the apparent inefficiencies and inadequacies in the functioning of the health care system and the response to public health crises. This led to disorganized contact tracing, quarantining, and communication to the public (MacDougall 2007). Financial challenges within the Canadian Health Care System further enhanced Toronto's vulnerability to the SARS epidemic, including a lack of resourcing towards public health infrastructure and acute care . Similar to MacDougall (2007), point to a lack of cooperation and collaboration between the three levels of government to explain this public health crisis-ownership of responsibility and duty to respond was deflected between each level and remained unclear . According to the literature, SARS quickly became a profoundly racialized disease and inflamed racial tensions in the Greater Toronto Area ultimately leading to the social exclusion of a racial minority-the Asian-Canadian group . Such avoidance and stigmatization is reported to have played out in several spaces, such as on public transit and other public spaces, and families advising children to avoid Chinese peers in school . Some of the literature posits that this racialized stereotyping could have been prevented with denunciation from leaders in government and public health .On the other hand, according to , individualized health behaviors aimed at preventing SARS contraction-for instance, wearing a face mask-may have justified the avoidance of the stigmatized of the Asian-Canadians . Culture and ethnicity functioned not only as a risk factor for discrimination but also as a facilitator in the response to the outbreak. The Chinese-Canadian community in Toronto employed numerous strategies to combat SARS and ease social anxieties, including fundraising for research, the dissemination of health promotion materials, and launching a SARS support line, among other activities . The mobilization of spiritual leaders was also found to be an effective means of disseminating public health information ). While it is important to recognize the contributions of cultural and ethnic groups, we assert that cultural and ethnic minority groups are more often targets of blame, as was the case for SARS in Toronto. summarizes the total number of hits, and the number of papers that were retrieved and reviewed. The reviewed literature spoke to all of the themes identified in the politics of epidemics literature (socioeconomic distribution of disease, credibility of evidence, and the attribution of infectious disease responsibility), with the exception of decision-making in research and development. Similar to the above outbreaks, the people who were most affected by Zika were in some way socially marginalized, the poor, and more specifically, poor women. At the global level, it was the poorer countries and communities within those regions that were most impacted: those with precarious and/or inconsistent access to health care services, lacking the resources and infrastructure to prevent, diagnose, and treat the virus . No wonder the impact of the outbreak was more devastating in Brazil, which was already financially strained prior to the emergence of the Zika virus, with limited human resources: doctors, nurses, and other specialists , as compared to higher income countries who were more protected from the effects of the Zika virus given effective prevention programs, funding, and infrastructure . Easy and extensive access to mosquito repellants, air conditioning, effective waste management programs, and low rates of urban crowding protect more economically developed countries, such as the USA from Zika transmission . Consequently, Zika virus has aptly been labeled an Binfectious disease of poverty^ 3) . Some of the literature attributed the lack of public health infrastructure and resources to respond to the Zika outbreak in Brazil to these inequalities with regard to who is most affected. Contrasting Zika with HIV, since HIV/AIDS initially affected the prestigious population, such as celebrities, doctors, scientists , they were able to advocate and secure increased public funding of HIV interventions from the Ministry of Health and State Departments of Health. However, this seems to have happened at the expense of funding for vector control programs, such as those controlling mosquito vectors responsible for the transmission of dengue and Zika viruses, which mostly affected poorer communities . Among the poor populations, women who were either pregnant or considering pregnancy were also more vulnerable to the effects of Zika, as the virus is considered to be a teratogen . Government programs in Brazil that intend to provide free mosquito repellent to pregnant women do not consistently reach some of the poorest communities, such communities are remote and often lack accessible quality health care facilities due to distance and poor physical infrastructure . Access to quality reproductive health care is essential given the accumulating evidence linking the Zika virus to a rise in cases of infants born with microcephaly-an unusually small head for age and sex (World Health Organization 2017a). Conditions related to poverty, such as poor sanitation and increased exposure to larvicides and insecticides, which may cause mutation, have also been implicated . While the climate of South America is particularly suitable for replication of the Aedes mosquito-the vector responsible for transmission of the Zika virus ), climate change is thought to have resulted in extremely heavy rainfall and droughts which can support this proliferation of mosquitos, hence facilitating the spread of the Zika epidemic . Both the puddles created during heavy rainfall and the open barrel water storage during droughts, create ideal breeding places for mosquitos . Since the causes of climate change are complex, global, and political, the responses are political, often emphasizing the symptoms of climate change at a local level (in especially the poor countries), rather than addressing the causes at a global level. A combination of an ecosystem-focused perspective and a social-anthropological lens is vital Bbecause a pathogen requires a receptive population in order to cause disease . Ultimately, the need for a cross-disciplinary response pathway cannot be understated . Comprehending the diversity of causal explanations and associated responses allows for the politics of evidence to be more deeply appreciated. The production and evaluation of the evidence used to provide a causal explanation and promote a response is also political. In the case of the Zika epidemic, most of the literature on the attribution of infectious disease responsibility focused primarily on the cause, rather than the response, with most of the literature suggesting that globalization was responsible for the spread of Zika. Although typically the blame of the origins of infectious diseases are typically placed on a cultural minority group ; World Health Organization 2017c), with response action emphasizing individual behavior, prevention of mosquito bites and sexual transmission (World Health Organization 2017d; Brym and Lie 2014), however, arguably, in the case of the Zika epidemic, considerable responsibility was given to the effects of globalization. Globalization has been defined in numerous ways. For example, Bryn and Lie (2014) defined it as the Brapid increases in the volume of international trade, travel, and communication [which has] broken down the isolation and independence of most countries and people^(pg. 27) . This interpretation of globalization will be used of the purposes of this review. According to the literature, the interconnectedness between countries through travel facilitated the spread of Zika beyond the Zika Forest of Uganda-where it was initially discovered in the 1940s-to South East Asia in the 1960s, the Island of Yap in 2007, French Polynesia in 2013, and South America in 2015 . In light of the profound influence of globalization, international human travel, climate change, and urbanization on the spread of the Zika virus, some of the literature has called for research that assesses the feasibility of a more global response to preventing spread. summarizes the literature included in the review of Ebola in Liberia. Liberia was the focus for the purposes of this systematic review because of its high prevalence of Ebola in comparison to other affected areas. Similar to Zika, the reviewed literature spoke to all of the themes identified in the politics of epidemics literature (socioeconomic distribution of disease and the attribution of infectious disease responsibility), with the exception of decision-making in research and development. Ebola virus disease has been predominantly reported in lowincome countries, with the last outbreak in 2014 reported to have caused 11,323 deaths worldwide and 4809 deaths in Liberia alone . The most affected countries typically had poor health infrastructure . It is only during the most recent outbreak that the virus spread to high-income countries . Within these countries, and as was exemplified in Liberia, the outbreak mostly affected those populations that were very poor, remote, and lacked proper physical infrastructure, including roads, proper sanitation, and health facilities. The burden of the Ebola epidemic fell disproportionately on the most disadvantaged Liberians, fundamentally politicizing the epidemic. The socioeconomic distribution of Ebola was political, as defined for the purposes of this review, in that many scholars attribute its emergence to the socioeconomic conditions of Liberia. For example, poverty, and subsequent limited investment into the health system led to weak public health infrastructure, insufficient information technologies, a lack of trained personnel, inadequate case reporting, was often cited as the key vulnerability of Liberia . Additional challenges included mobility of particular populations, authority distrust , economic instability, and a lack of governance (McNamara 2016). These challenges intersected with other stressors, such as climate change and food insecurity to intensify the effects of the Ebola epidemic ). Many rural communities in Liberia were particularly vulnerable, extremely poor, and lacking secure access to food and health clinics . Similar to the other epidemics, people living in poverty were most vulnerable to the effects of the Ebola epidemic. Poverty facilitated the spread of the Ebola virus; thus, interventions aimed at addressing poverty would be most effective in containing further transmission . Socioeconomic inequality was additionally evident in the Binequitable management of the dead^(Pellecchia, Crestani, Decroo, Van en Bergh, Al-Kourdi, 2015, p. 1) . While cremation was a less acceptable cultural practice , it was mandated to limit further Ebola transmission . The new practice seemed to have impacted the economically disadvantaged who could not afford to pay for private burial services instead of cremation, aggravating socioeconomic divides . The disorganized and delayed response has been labeled a global health governance failure by Roemer-Mahler and Rushton (2016) who argue that Bthe outbreak was not only a global health problem but also a global political problem^(p. 374) . echoes a similar criticism in noting that development aid was used in contexts with insufficient infrastructures for the aid to be effective and focused too heavily on issues unrelated to Ebola . The international responses are argued to be far too short term, framing the Ebola epidemic as an African, and therefore racialized problem , leading to global neglect of the disease. This scholarly evaluation of global responses to the Ebola virus politicizes the epidemic, calling into question the evidence used to inform response pathways, which were largely insufficient and inappropriate for the Liberian context. More credible sources of evidence would have considered the urgency of Ebola and the unique sociopolitical context in which it was spreading. The Liberian culture was blamed not only for causing the epidemic but for interfering with control measures. Jones (2014) criticizes this Bculturalist epidemiology^(pg. 1) that overlooks the wider global forces that promote the spread of Ebola, instead exoticizing Liberian culture to attribute responsibility . For example, traditional burial practices and the consumption of bush meat were identified as key etiological factors to the Ebola epidemic . Some analysts even suggest that these cultural practices, in addition to local distrust of authorities may have obstructed interventions . Therefore, population behaviors, such as education and safe burials and cremations, were proposed as targets for intervention . However, other proposed causes of the Ebola epidemic included seasonal triggers, infection of nonhuman primates, landscape modification by humans, poverty, inadequate public health infrastructure, conflict, and population growth . Failure to focus on these and narrowly focusing on cultural practices politicizes the Ebola epidemic; yet, public health authorities, governments, and academics have largely attributed disease responsibility to local culture. There were similarities and differences in the narratives about the different epidemics. Broadly, none of the epidemics had narratives relating to all four themes of the politics of epidemics. However, both Zika and Ebola had narratives on three of the four themes. The SARs literature addressed only two of the four themes. Notably, there was lack of relevant literature on the research and development theme. The finding that the literature on how decisions about what research is funded and conducted during disease epidemics almost exclusively focused on the ethical implications (and did not question the potential power imbalances with regard to who identified the research issue/question and who led the research for example), was surprising, since this was a key theme in the broad politics of epidemics literature. This could, in part, be a reflection of the limitations of the search engines and strategy used in the study which excluded publications, which were deemed biomedical. Conversely, it may be a reflection of limited support for critical research and related publications. A case-specific review of the literature has demonstrated the influence of power and privilege on the experience of an epidemic. In the case of Zika in Brazil, the communities most vulnerable to the virus are those with insufficient resources and infrastructure . Consequently, Zika has been socially distributed to exacerbate conditions of poverty. However, the voices of these most affected people are drowned out by more powerful and prestigious groups. This is seen in the comparison that makes between the politics of HIV/AIDS and the politics of Zika . HIV/AIDS research in Brazil procured greater funding because those affected tended to be very notable Brazilians with more dominant social and politics voices . The same pattern was reported in relationship to the Ebola outbreak in Liberia, where weak public health infrastructure aggravated Liberian experiences of Ebola , while interventions targeted cultural practices, ultimately disempowering the economically disadvantaged . This review found that infectious disease outbreaks disproportionately affect the poor, specifically communities with poor physical infrastructure and limited access to quality public health services. The link between income and politics of epidemics has been discussed in the social science literature, where poverty is perceived to be the greatest risk factor. For example, argued that disease outbreaks, e.g., Ebola systematically affecting poor people and are tied to regional trade networks . Building on this literature, Marcella (2011) uses the term structural violence to highlight the institutional biases, inequalities, and economic policies that emanate from global centers of power and privilege, which tend to marginalize poor people during outbreaks . These linkages highlight social and economic inequalities (within communities, societies and countries), which are complex and often ignored by the medical (and political) communities . Indeed, some of the narratives criticized the (epidemiological) evidence, which tends to overlook the role of poverty in the facilitation of disease spread. For instance, focusing on pregnancy in the case of Zika in Brazil was criticized for overlooking the conditions of poverty that might also/instead be responsible for the spike in cases of microcephaly . Furthermore, responding to Ebola in Liberia with developmental aid that is not designed for contexts with insufficient infrastructure was criticized again for overlooking the role of poverty . This limited focus on the role of poverty in the peer-reviewed medical literature calls into question the politics of the research process itself. For example, what institutions are funding the research and what are the interests of the stakeholders in the research process? Who gets funding to conduct the research? What are the advantages of overlooking poverty for those producing the evidence? By disregarding the role of poverty and income inequality, epidemic responses will remain insufficient, and may, instead worsen the situation of poor populations . Perhaps poverty remains unaddressed in epidemic responses as its origins in a neoliberalist society feel too deep to uproot. Ironically, disease outbreaks facilitated the development of the Global Public Health Intelligence Network (GPHIN), an information sharing platform whose aim is to improve the credibility and authority of public health specialists to manage an outbreak. The platform is thought to have reduced the time between the outbreak and reporting and is thought to contribute to 40% of WHO's early warning and played a role in SARS notification, and subsequent outbreaks . The review clearly showed the consistency in the scholarly literature, that it is common to find that complex issues are oversimplified, whereby culture is used to justify the assigning of blame to minority groups . The review highlighted the role of politics and power in shaping different narratives, whereby powerful institutions assert particular narratives (often marginalizing the populations), which are Bpushedt o frame policies, publications, interventions, and funding agendas, while the narratives of the marginalized populations (those voiced by or representing marginalized people) are marginalized . This type of outbreak narrative was evident in the three epidemics discussed in this paper. In Toronto, SARS became a racialized disease, ultimately victimizing and excluding the Asian-Canadian community . The spread of the Zika virus in Brazil is largely attributed to the consequences of globalization, including the widening habitat of the Aedes mosquito vector and increased human and air travel . Cultural explanations are prominent in the outbreak narrative speaking to Ebola in Liberia, specifically the human consumption of bush meat and local/traditional burial practices that involve the touching and kissing of the deceased ; World Health Organization 2017c; . When responsibility for the origin of the epidemic is reduced to cultural and ethnic minority groups, for example, the Asian community during the SARS epidemic, South American women living in poverty during the Zika epidemic, and communities engaging in traditional Liberian cultural practices during the Ebola epidemic, this further marginalizes the already vulnerable populations. It is important that emphasis is placed on the extreme vulnerability of these groups to an infectious disease outbreak, rather than placing blame and ultimately exacerbating experiences of oppression. The findings in this paper should be interpreted with caution. First, this systematic review relied upon published peer-reviewed literature. This overlooked documents, such as WHO reports, government documents, and books, which might have contained relevant information. For example, we are aware of critical texts in the form of books, which may have enriched this manuscript-specifically, and provide insight into how other cities affected by SARs attributed responsibility, talked about attribution of risk and responsibility for the disease; point to Singapore's attribution of responsibility and credibility of evidence; , while also note the racialization of the SARS epidemic in Toronto, reflecting on the stigmatization of Toronto's Chinese and South Asian communities . However, work positions globalization as both responsible for causing and responding to infectious disease outbreaks , such narratives are thought to create space to better understand how such processes might be repurposed as public health solutions. Finally, ground the Ebola outbreak in Liberia in the context of colonial legacies, specifically emphasizing that global public health responses were political in that the establishment of the public health infrastructure tasked with responding to Ebola was influenced by social inequality, colonialism, and racism (). By adopting a social science perspective, unpack the diverse factorssocial, political, environmental, medical, and legal-that facilitated the escalation of the Ebola crisis (). However, since the scope of this paper was limited to peer-reviewed journal publications and one outbreak from each income context as an illustration, such information (from books and from other contexts) although relevant, was beyond the paper's scope. Another limitation is the time frame of the study. Scholarly literature is consistently evolving, and in the specific case of the Zika epidemic, which was considered a public health emergency of international concern at the time of data collection, new and relevant research was being produced after data collection ended. For feasibility purposes, data collection ended in June 2017. It is also important to note that research and research publications sometimes tends to be biased and may marginalize the narratives by or representing the most vulnerable poor populations and political topics. Furthermore, social science literature that did not fit the definition of politics as articulated for the purposes of this review was excluded. A future review might seek to unpack the themes that emerge from this additional literature. This systematic review of the politics of three different outbreaks in three different social economic contexts revealed that the politics of epidemics are-to an extent-universal. However, the manner by which the politics are played out vary by the income setting, the political themes that speak to general epidemics were found to be uniquely enacted during the SARS outbreak in Toronto, the Zika outbreak in Brazil, and the Ebola outbreak in Liberia. Perhaps the most universal finding of this systematic review is the role of social and economic inequality, including poverty during an epidemic. Regardless of the national income setting, minority and marginalized communities are the most devastated by an epidemic. If organizations and governments are to adequately respond to these individuals and communities, it is critical that narratives of those most vulnerable to an epidemic-specifically poor communities-are represented in the mainstream media as well as in the peer reviewed published literature-especially, the epidemiological and medical literature that tends to influence health programming and policy-making. Funding Information LK's research is funded by the Canadian Institutes for Health Research. AR is a PhD candidate. Ethical Considerations This is a review paper. No human subjects were involved in the study. The authors declare that they have no conflicts of interest.",
     'qas': [{'id': '11e1b119-9b84-436e-88f9-d23a04a70b6b',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'ae8faf10-c158-4ad0-816b-723f524cad93',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '454fdf36-5423-4358-b6f8-ecb24c480bb3',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'cb7989cc-a153-420b-81d4-ec9abf9aa556',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '928bb312-478a-48e4-9c3d-933ef8346111',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '24249e3a-5732-43d0-885e-8450fc299237',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '789e22cd-d8ef-4947-8bc8-f0b07d084fa9',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '3fae6b19-5e45-4d8d-ae25-f9edbdce50d0',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': 'fe734cca-746d-4f97-bdbb-cba5af209451',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '999178fd-ac89-4143-b8ee-7433732d96fa',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'a3385ddd-213a-4097-870b-a40779cb923b',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'It is amazing that, within a short time span of less than 17 years, two similar epidemic outbreaks occurred in China: SARS-1 in 2002 and SARS-2 in 2019. Although identification of viral origin(s) is very critical for understanding these epidemics, a study comparing a wide variety of natural and social factors potentially influencing the progression and the trajectory of these epidemics is also important. Through a comparative analysis of environmental factors and human activities in these two serious public health events, we wish to find some common ground for the occurrence of SARS-1 and SARS-2. The environmental situation of another coronavirus outbreak also seems to support the above-mentioned theory. MERS-CoV was first detected in a patient living in Jeddah, Saudi Arabia, in June of 2012 . The annual rainfall in Jeddah is low at 61mm, and there was no rain at all in June of that year in Jeddah . Therefore, relative to temperature, low humidity seems to be a more critical environmental factor influencing outbreak of human coronavirus disease. Thus, humans might become unfortunate hosts for SARS-CoVs as a result of some inappropriate interactions with wildlife and thus exposure to unfriendly viruses (  In agreement with these natural characteristics, bats have been found to inhabit locations near Yangtze River Bridge, which has rows of green lights that are tuned on for all of the night-time. Incidentally, Huanan Seafood Market is only 20 minutes away from this bridge. Bats gathered near the Yangtze River Bridge might have released the virus and even infected intermediate hosts for some time. The cold and dry winter helped viruses to survive in the environment and eventually found some ways to cross the species barrier, a phenomenon known as "viral chatter" . The increased vulnerability of human beings in winter time and the increased human exposure to wild animals during holidays made infection to SARS-COV-2 more likely. With so many bats concentrated into a local area, the spreading of viruses by bats might be much wider than just being restricted to one wildlife trading place such as the Huanan Seafood Market. The viruses might have lived in this big "incubation bed" for some time and achieved some mutations before jumping on to the final hosts-human beings. Although the origins and the occurrences of SARS-CoV-2 are both unclear, the control measures for the current epidemic should focus on immediate cut-off of transmission of the disease and through disinfection of infected locations. Quarantine of patients (both confirmed and suspected), isolation of susceptible population, and protection of high-risk professions are necessary measures for reducing exposure to the viruses and eliminating the risk of getting infected by the viruses. At the same time, infected locations must be adequately disinfected. Areas that will be open to the public should be carefully surveilled for the existence of SARS-CoV-2 and be cleaned of the virus if it is found. Modern communication methods should be effectively used for passing reliable information on the epidemic status, the treatment measures, and the self-protection skills, among others. As a matter of fact, if fine-tuned and highly-effective internet control for "public opinions" can be turned into beneficial use of monitoring the "epidemic situation", fighting against an even larger outbreak of any infection would be much easier and cost-effective. SARS-CoV-2 has entered human communities, and eliminating virus from human bodies does not means its eradication in nature. The risk of SARS-CoV-2 infection will remain for a long time. Thus, adequate cautions must be taken for safe-guarding against future outbreaks of SARS. The prevention can be achieved by implementing a multi-facet system that considers both natural and social aspects of the SARS epidemiology discussed earlier. For example, regular surveillance of viral status in nature should be carried out to monitor the variation/evolution and abundance/localization of the virus. This information may be served as an early warning and used for preparation of potential vaccines. The government should issue laws and policies to tighten protection of wildlife and prohibit consumption of wild animals. A grass-roots and transparent reporting system should be established and put into public use for reporting any case of confirmed or suspected human infection. The disease-reporting system should be organically synchronized with the meteorological system so that adverse environmental conditions conducive for viral infection on human beings can be forecasted and macro-scale preparations can be made in case an emergency occurs. Finally, but not lastly, in developing human society including building massive constructions for residence and transportation, potential ecological impact on wildlife and possible consequences of breaking natural balance of the ecosystems should be carefully evaluated. Author Contributions: All authors have made a contribution to this manuscript. Z.S. designed, drafted, and edited the initial manuscript. K.T. reviewed and edited the initial manuscript. S.S.K. edited the initial manuscript. G.H. conceptualized and designed the framework of the manuscript. S.V.L. wrote the revision of the manuscript and brought many of his independently originated ideas into the revised manuscript. All authors have read and agreed to the published version of the manuscript. Funding: This work is supported in part by the National Natural Science Foundation of China under grant no. 71964020.',
     'qas': [{'id': 'ff9166c0-cc99-4ec2-b546-3fc480bf64a8',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '0b4501b4-e4cc-4d86-ac97-b6c7c170b808',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '6fa04311-f7f4-4c06-a73f-6972e00a394d',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '15a80de0-854a-4cce-bf58-dda6d1d6de15',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': 'f789a7bd-006a-4533-97b2-531a5ea90dc5',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': 'f9b9c1d6-0647-445c-bcf9-01842ef7d664',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '4688502a-9880-428b-bf9b-2d39185e56f8',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '5e035f52-2863-4397-8103-8ab581e45a03',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': 'f3ecd172-eff0-45db-8775-55bad00e4403',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': 'd348b921-21d1-4d9f-a7e0-209916e623b5',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': '30f9d646-9a98-443e-bf4b-614ee5cc8f37',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'The aim of the current study was to explore the effect of sustained transmission from the four Chinese cities of Wuhan, Beijing, Shanghai and Guangzhou on international disease importation risk to 168 countries and territories, with a specific focus on Africa where current levels of healthcare infrastructure could provide a significant challenge for managing this novel epidemic. The current situation is extremely dynamic and since then some countries have instigated flight restrictions and closed borders (e.g. Russia). These decisions were relevant for these locations but not based on probabilities. WHO has not recommended a cessation of transportation to free countries but suggested preventive measures. This would seem appropriate for Africa and South America with the caveat that only one case is needed to initiate a local epidemic without proper biosecurity and quarantine measures, whilst other regions will need to decide on a case-by-case basis through appropriate risk assessment.',
     'qas': [{'id': '3b69257e-7bfb-4d3d-b629-6a27f8fed304',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '6a57b320-3014-43bd-86e1-93f8706cf1e9',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '5ec0073e-4ccf-4985-8127-947287973c67',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'dea28082-08ea-4dfa-ae78-b20c08f23962',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '4fbbfd16-7f54-40a3-ad1a-f25373b3fa4b',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '03f5fdb1-a500-484d-b9b3-bd86229bd4eb',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': 'af3ad353-1337-4051-b039-dd257520fec9',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': 'b2e118a7-586a-4097-908f-a656390fa87d',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '97c6180e-0dff-4513-bb2e-71a1d14e1499',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '50be2be6-0d4f-43d8-a50f-5349ec04916e',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'f0bc0f8f-9739-4d60-8908-1db0bec5532c',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'In December 2019, China detected many cases of viral pneumonia-like disease similar to SARS that were confirmed to be caused by novel Betacoronavirus, provisionally called 2019 novel coronavirus (2019-nCoV). Since then, the novel coronavirus outbreak has raised attention throughout the world. Although the potential cause of the disease is still unknown, initial reports predicted that the virus is possibly of zoonotic origin. 2019-nCoV is the causative agent for severe respiratory infection in humans termed as novel coronavirus-infected pneumonia (NCIP) . nCoV is the third known coronavirus that causes fatal respiratory diseases in humans after highly pathogenic viruses SARS-CoV and MERS-CoV. Chinese researchers isolated the novel coronavirus from the infected patient in early 2020. As the virus is closely related to other bat coronaviruses, it is suspected that the bats are the primary reservoir for the virus. However, it is still unclear that, if the virus transmitted to humans directly from the bats or whether through an intermediate host. Detailed understanding of the enzootic patterns of the virus, its evolution, and surveillance are essential to control the disease and possibly to prevent the future epidemics of similar viruses. The transmission of 2019-nCoV is often spread from person to person through the respiratory droplets generated during coughs or sneezes from an infected person. Human-to-human transmission is reported in countries such as Germany, Japan, Vietnam, and the United States . The confirmed cases through inter-human transmission have increased the fear and panic accompanying the 2019-nCoV outbreak. It is still unknown whether the virus spreads only through human contact or if there is possible transmission through oral-fecal contact as well. The incubation time varies from 2-14 days after infection. The clinical presentation of this infection resembles SARS-CoV characterized with fever, dry cough, and shortness of breath in most of the cases, whereas non-respiratory symptoms such as headache, muscle ache, dyspnoea, rhinorrhoea, sneezing, sore throat, diarrhea, nausea, and vomiting are also reported in few patients. The affected persons also develop acute respiratory distress syndrome. Cases with critical illness showed respiratory failure, septic shock, and organs failure, which require intensive care support . At this time, the knowledge about this virus is limited. New cases and mortalities are increasing daily. As a newly emerging viral infection, there is no vaccine or anti-viral therapeutics to treat human coronavirus infection till now. As of now, preventing infection is the current priority for disease control. The current protocol for infected patients is to quarantine and provide supportive management and palliative care. The best way to avoid the virus infection is to keep oneself away from infected people and the utmost personal hygienic care is essential. Quarantine measures shall be taken to separate, restrict the movement of infected people, and also the normal population from the regions where there is an epidemic outbreak. The WHO recommended precautionary measures to the general public, such as frequently cleaning hands, wearing a face mask, avoiding close contact with the infected persons or farm animals, and avoiding consumption of raw or half-cooked meat/eggs and following good food safety practices . There is an urgent need to develop rapid diagnostic tools and vaccines or post-exposure prophylaxis to treat this infection. Reliable, timely laboratory diagnosis and an effective vaccine are crucial for effective disease management and public health intervention. An effective vaccine should be affordable, and also the production platform should produce suitable vaccine candidates rapidly at low cost, especially during a disease outbreak. The advantages and disadvantages of the current expression systems for recombinant protein production are given in . Currently, plant expression system offers many advantages over other conventional systems that have the potential to tackle the production of vaccine candidates rapidly at affordable cost facilitating the global vaccination programs, especially in resource-poor nations where the vaccines are needed most . The coronavirus outbreak has been declared a global health emergency and represents one of the greatest risks to global health, as the virus has a tendency to infect a large number of human populations, and the outbreak can cause severe medical complications with economic impact, particularly in middle-income countries where resources are limited for early diagnosis and preventive measures. Human mobility, air travel, and international trade can likely increase the number of cases in other regions as well. Continued surveillance along with the robust response of government agencies, medical practitioners, and researchers, is highly essential for the effective management of this emerging pathogen. Public health officials need to identify the source and virus reservoir, transmission cycle, pathogenesis, inter-human transmission, and clinical manifestations, which might be helpful to develop animal models, diagnostic reagents, anti-viral therapies, and vaccines against this pathogen. As the virus emerged suddenly and became a serious global concern, there is a need for rapid vaccine development. Although classical expression systems for biopharmaceutical proteins are still amenable, the development of transient expression in plants has deeply influenced the pharmaceutical sector to produce affordable vaccines and biologics rapidly at low cost. Hence, the plant expression platform shall be employed for biopharmaceutical production to accelerate the fight against this deadly infectious disease. The collaborative efforts of researchers are highly desirable to use a plant expression platform for producing an efficient cost-effective vaccine to control this epidemic. The continuous effort of research in this direction might be helpful in producing high-value biologics and pharmaceuticals on a large scale in a short time, especially during epidemics. ',
     'qas': [{'id': 'a5f84276-2703-4032-ac0c-59db22472f84',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '272d8c76-8d8c-47c1-8d32-c700f6a65b08',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '8f6dc417-8abc-4da3-a926-376c13bfa813',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'a3240332-4581-49d4-9d88-f600cf510b53',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '56a688fd-3104-4efb-a251-204c32d20d11',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': 'b3655fd7-0f58-4f81-8613-91cc0bcc0912',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '7c5cf2f3-0a6c-4747-809e-c8052f64318d',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '2afdb459-164e-49c5-9496-80a03d4ab975',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '2a0cb6fd-18ce-4004-b20e-c6dc7762b010',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '2ede3493-5567-4ca0-9a1a-9a717fe1d757',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': '32014f64-5054-4abf-9183-4dd9f1c51946',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': "Since December 2019, a new type of coronavirus called novel coronavirus (2019-nCoV, or COVID-19) was identified in Wuhan, China. The COVID-19 has then rapidly spread to all over China and the world. It can cause symptoms including fever, difficulty in breathing, cough, and invasive lesions on both lungs of the patients . It can spread to the lower respiratory tract and cause viral pneumonia. In severe cases, patients suffer from dyspnea and respiratory distress syndrome. The pandemic has a big number of infected patients that far exceeded the equivalents of Severe Acute Respiratory Syndromes (SARS) and Middle East respiratory syndrome (MERS), though with a lower fatality rate. According to the surveillance statistics reported by the Chinese government, by February 19, 2020, the number of confirmed infection cases increased to 44,412 for Wuhan and 74,280 for whole China, with 1497 and 2009 deaths respectively. Moreover, the pandemic has caused 919 confirmed infection cases and 3 deaths globally. Therefore, Wuhan city and Hubei Province are the targets for intensive interventions. Otherwise, the spread would have been much faster to all China and the world. Wuhan is a transportation hub of China, it is a highly dense city and has a large population of more than 14 million in 2019. The World Health Organization (WHO) had a meeting on January 30, 2020 and they declared the coronavirus outbreak from China a public health emergency of international concern. Further, there are lots of concerns and debates all over the world, indicating a need for more understanding of China's systems in responding to the outbreak. Therefore, based on our firsthand experience of working with few of the COVID-19 cases, the purpose of this article is to have a brief report of current development, challenges, and future directions of the coronavirus outbreak in Wuhan. It is recognized by the international community that China has made remarkable progress in responding effectively to the outbreak . What made China address the epidemic faster is its ability to finance and mobilize resources combined with its strong governance structure, efficient execution, and solidarity of the whole society. It just took 1 month for China to recognize the existence of a novel coronavirus after the first case was reported, followed with a series mandatory actions in both Wuhan and all over China. In contrast, it took more than 4 months for SARS. On December 31, 2019, delegates of the Chinese Center for Disease Control and Prevention (CDC) went to Wuhan for field investigations, and the sample of new virus was isolated and further identified as a pathogen of unexplained pneumonia on January 6, 2020. The genome-wide sequence of the virus was decoded in the next few days . After recognizing it as an emergency epidemic on January 22, 2020, strong measures have been adopted immediately by Wuhan local authorities to characterize and control the epidemic, including isolation of suspected cases for treatment, close monitoring of contacts, epidemiological and clinical data collection from patients, and development of diagnostic and treatment procedures. More and more hospitals have been designated by the government to treat infected patients. Thousands of people have been quarantined in the new built hospitals such as Huoshenshan Hospital, Leishenshan Hospital, and Fangcang Hospital to provide care for the confirmed infection patients in Wuhan. In the meantime, patients with different severity are being treated in different hospitals. Thousands of medical professionals nationwide came to Wuhan and other cities in Hubei Province for assistance. Many kinds of guidelines have been developed, and useful information about risk factors and preventive measures are recommended to the public by various means . It is found that the COVID-19 can be transmitted through droplets, contact, aerosol, etc. . A person will not be infected if he washes his hands before touching the conjunctiva. Accordingly, measures such as washing hands, wearing masks and goggles are very effective to prevent potential infections. Further, Wuhan has implemented closed management of communities. Inhabitants are not allowed to go out of their communities and they are very supportive to this regulation. COVID-19 detection kits have been developed and the test results can be generated within 6 h, which is helpful for early diagnosis, treatment and judgment of the treatment effect. Although the number of patients with COVID-19 infections is large in Wuhan, the fatality rate is much lower (3.37%, by February 19, 2020) compared with that of SARS (11%, 2003). By February 19, 2020, 4895 people have been recovered after treatments and most of them are mild cases. It is the first time for the COVID-19 to infect humans and can be transmitted from person to person . The incubation period can be 2 weeks and even longer. Besides, the virus can spread during the incubation period or recessive infection, which makes it difficult to identify those suspected cases without clinical symptoms for prompt control. By far, the numbers of new infections and deaths have already exceeded the equivalent numbers of cases with SARS. If the situation cannot be fully controlled in Wuhan and Hubei province, the situation may deteriorate in other places of China and the world. The government of all levels have been taking strong leadership to combat the outbreak and in recent days there is a decline trend of new cases. However, some challenges still remain and need to be addressed: (1) The large number of confirmed and suspected cases in Wuhan make people staying with them in high risk of getting infected because of the contagiousness of the new virus . This is specially the case for medical professionals. By February 11, 2020, 1716 medical personnel have been infected and six of them died from all over China. Moreover, it is very difficult to identify those people without obvious symptoms, making their families in high risk of getting infected. (2) Hospitals have drastically constrained other services to meet the hospitalization needs of the outbreak in Wuhan. At the early stage, with more people getting infected and less of them recovered, medical facilities, personnel and protective supplies were increasingly insufficient. In many cases, patients cannot be quarantined and treated in time, and many medical staff cannot get fully protected. More efficient logistic services are expected to deliver donated materials from both China and the international society to medical professionals and communities. However, these are all operating problems and the situation has much alleviated with the strong leadership of government of all levels. ",
     'qas': [{'id': '3e1225ea-8da4-4005-ba11-9670f4aef6d5',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'c7c88b4e-3fea-4438-a2a0-24534bacb4b9',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '39435f8b-8e84-4abc-8a75-eb0eb233d5d8',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'a99b9f8d-fc05-4d70-b9f2-d2539d4f5457',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '7871ece3-99a5-4102-91bd-d3087c30df9c',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': 'f00477be-b911-48be-8b1c-53f2160c1ed2',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '95749cb3-d2be-42aa-b222-9c56867e36a8',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '6d7b4df4-759c-470a-a38b-752ff7e84db4',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': 'be3932df-c28f-4e21-b79d-cd7750815d3f',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '766a2d1f-8ce6-4bb1-bc0e-567ea037ce0c',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'd60ebd1c-7bdf-434a-a74c-fc8605987da6',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': "Long non-coding RNAs (lncRNAs), which are transcripts larger than 200 nt in length that lack protein-coding ability, have previously been described in mammalian cells . Most of them have a structure similar to mRNA; they have a 5 methylguanosine cap and are usually spliced and polyadenylated at their 3 termini. Notably, lncRNA expression shows significant cell and tissue specificity . Emerging evidence shows that non-coding RNAs have a regulatory role in multiple cellular processes, such as genomic imprinting, chromatin modification, and alternative splicing of RNA . Moreover, some diseases such as cancer and neurological disorders are also related to the dysregulated expression of lncRNA . Numerous studies have been conducted to ascertain their functional role during viral infection. For example, NRAV can promote influenza virus replication and virulence through negatively regulating the initial transcription of varieties of interferonstimulated genes (ISGs) . lncRNA-ACOD1, named by its neighboring coding gene aconitate decarboxylase 1, significantly reduces virus multiplication by directly interacting with the metabolic enzyme glutamic-oxaloacetic transaminase . Neat1, one of the lncRNAs induced by HIV-1 infection, is retained in the nucleus and serves as a scaffold for the nuclear paraspeckle substructure. Importantly, Neat1 deficiency enhances HIV-1 replication . Although large amounts of data have proved that several lncRNAs are involved in different kinds of virus infection, the mechanisms by which they act are still largely unknown. For RNA-seq, ST cells were infected with PDCoV at a multiplicity of infection (MOI) of 10; the medium for PDCoV infection was DMEM containing 0.2 ug/ml Trypsin that had been TPCKtreated (Millipore Sigma, St. Louis, MO, United States) for 11 h. Mock-infected cells were placed in the same volume of DMEM, with the same concentration of TPCK-treated Trypsin. Total RNA was isolated from each group using SuPerfecTRI TM Total RNA Isolation Reagent (Pufei, Shanghai, China) according to the manufacturer's instructions. The RNA quality was checked by 1% agarose gel electrophoresis. The purity and concentration of RNA were measured by NanoPhotometer R spectrophotometer (IMPLEN, München, Germany) and Qubit R RNA Assay Kit in Qubit R 2.0 Fluorometer (Life Technologies, Camarillo, CA, United States). RNA integrity was assessed using the RNA Nano6000 Assay Kit of the Bioanalyzer 2100 system (Agilent Technologies, Santa Clara, CA, United States). For quantitative RT-PCR (RT-qPCR), ST and IPEC-J2 cells were infected or mock-infected with PDCoV at an MOI of 10 and harvested at the indicated time. All experiments were conducted in triplicate. Gene Ontology (GO) enrichment analysis of differentially expressed genes or lncRNA target genes was conducted with respect to biological process, molecular function, and cellular component with the GOseq R package, in which gene length bias was corrected. Kyoto Encyclopedia of Genes and Genomes (KEGG) was used to perform pathway enrichment analysis 1 . KOBAS software was used to test the level of statistical significance of enrichment of differentially expressed genes and/or lncRNA target genes in KEGG pathways . Sequence (5 -3 ) Amplicon For each lncRNA, the Pearson correlation coefficient of its expression value with that of each protein-coding gene was calculated. Under the conditions of an absolute value of the Pearson correlation coefficient >0.998 and p < 0.00001, the interaction network of the differentially expressed lncRNAs and protein-coding gene co-expression pairs was then constructed using Cytoscape (v3.5.1) . Correlation analysis of DE lncRNA and protein-coding genes identified a number of DE lncRNA-DE protein-coding gene pairs. The main enriched KEGG pathways of these protein-coding genes were in metabolism and oxidative phosphorylation. In a recent report, 5-day-old neonatal pigs were infected with PDCoV, and transcriptome profile and KEGG pathway enrichment analysis were performed at different stages of infection . In our study, we found that the lncRNA targeted genes enriched in those pathways that were perturbed during the late stage of infection. In addition, the expression level of transglutaminase 3 (TGM3) and apolipoprotein A-2 (APOA2) in a study were significantly changed. Similarly, we also found that TGM1 was up-regulated, and APOA1, APOA4, and APOA5 were down-regulated during PDCoV infection (data not shown). Moreover, our data show that many cytokines and chemokines, which elicit an inflammatory response, were differentially expressed in the infected cells compared to mock cells. The inflammation causes injury to the intestinal tissues, resulting in diarrhea or even death. Raised CCL and CXCL10 levels were associated with the severity of virus infection . Here, we identified a number of lncRNAs that may regulate the expression of these inflammatory molecules. In the present study, the expression profiles of lncRNAs were determined in PDCoV-infected ST cells. In total, 1,190 novel lncRNAs were identified. A total of 830 lncRNAs were differentially expressed between PDCoV-infected or mockedinfected ST cells. KEGG pathway analysis of DE lncRNA coexpressed genes revealed that they might be primarily involved in regulating metabolism and TNF signaling pathways. Our study systematically characterizes lncRNA expression during PDCoV infection and provides a useful resource for identifying and functionally characterizing the cognate gene products of those lncRNAs. This study will also be useful for assigning lncRNAs as potential biomarkers of PDCoV infection and designing better preventive and therapeutic measures against the virus infection, which would be economically beneficial for the pig farming community. The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation, to any qualified researcher. JLL, JG, and JZ conceived and designed the experiments. FW, LD, and JL performed the experiments. JLL, YY, YJ, and TY analyzed the data. JLL drafted the manuscript. All authors read and approved the final manuscript. ",
     'qas': [{'id': 'f22ce1d3-c894-4696-965a-eb573d0a899a',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '76ba5def-6299-430c-b049-fd2b17721bde',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': 'bb01c680-87d6-4c58-982f-0a071070cf0c',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '50057c55-25d4-4d3c-986d-bdee4331bdbe',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '3e458587-1c40-4b53-8820-3c464202daa7',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': 'b37c735b-a493-476a-b7de-a3057e6f5375',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': 'ef1236b2-d8d1-4563-bab9-48acfed57b42',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '97afbc1b-af66-4b8e-bf77-7d823ddfdbbc',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '120702f8-38ee-48aa-8982-a05f398efe0e',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '6bf75c7b-1854-4ee4-bfac-30bcd95384e0',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': '4b506bf1-4fc5-421c-9c2c-e540288a4681',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': "On Nov. 27th, 2002, a respiratory illness erupted in Guangdong Province, China . In Feb, 2003, the Chinese Ministry of Health announced that this acute respiratory syndrome had thus far resulted in 305 cases and five deaths . The following month, there were clusters of atypical pneumonia reported in other parts of mainland China, Hong Kong , Canada , and Singapore . In Jul, 2003, SARS-CoV spread across 26 countries in six continents, and caused a cumulative 8,096 cases and 774 deaths (9.6%) . In particular, a higher mortality (21%) was found in hospital personnel . On Dec. 29th, 2019, the health departments of Hubei Province received a report that four employees of the South China Seafood Wholesale Market were diagnosed with unknown-caused pneumonia in a local hospital, which was the first report of SARS-CoV-2 . On Dec. 31st, 2019, the National Health Commission of People Republic of China and Chinese Center for Disease Control and Prevention (China CDC) participated in the investigation and case-searching work . On the same day, the government of Wuhan released information about the disease outbreaks to society . Nowadays, the number of patients infected with SARS-CoV-2 continues to climb worldwide. By the date of this paper's submission, a cumulative 67,081 cases and 1,526 deaths (2.1%) were reported worldwide. In Wuhan, China, the number is 37,914. The main timeline of SARS and COVID-19 epidemic development were shown in ,b, respectively. Glucocorticoid and interferon Lopinavir/ritonavir (in testing) On Nov. 27th, 2002, a respiratory illness erupted in Guangdong Province, China . In Feb, 2003, the Chinese Ministry of Health announced that this acute respiratory syndrome had thus far resulted in 305 cases and five deaths . The following month, there were clusters of atypical pneumonia reported in other parts of mainland China, Hong Kong , Canada , and Singapore . In Jul, 2003, SARS-CoV spread across 26 countries in six continents, and caused a cumulative 8,096 cases and 774 deaths (9.6%) . In particular, a higher mortality (21%) was found in hospital personnel . On Dec. 29th, 2019, the health departments of Hubei Province received a report that four employees of the South China Seafood Wholesale Market were diagnosed with unknown-caused pneumonia in a local hospital, which was the first report of SARS-CoV-2 . On Dec. 31st, 2019, the National Health Commission of People Republic of China and Chinese Center for Disease Control and Prevention (China CDC) participated in the investigation and case-searching work . On the same day, the government of Wuhan released information about the disease outbreaks to society . Nowadays, the number of patients infected with SARS-CoV-2 continues to climb worldwide. By the date of this paper's submission, a cumulative 67,081 cases and 1,526 deaths (2.1%) were  The initial symptoms of SARS patients were fever (100%), cough (61.8%), myalgia (48.7%), dyspnea (40.8%), and diarrhea (31.6%) , and the prognosis of patients was associated with host characteristics (including age, gender, etc.) . During hospitalization, respiratory distress occurred in 90.8% of SARS patients . The duration from disease onset to severe respiratory distress was an average of 9.8 ± 3.0 days . During the disease course, some patients developed leukopenia, lymphopenia, and thrombocytopenia with an upregulation of aspartate transaminase (AST), alanine aminotransferase (ALT), lactic dehydrogenase (LDH), and C-reactive protein (CRP) . In comparison, COVID-19 showed similar trends with SARS patients . Fever, fatigue, and dry cough are the main manifestations of the patients, while nasal congestion, runny nose, and other symptoms of the upper respiratory tract are rare. Beijing Centers for Diseases Control and Prevention indicated that the typical case of COVID-19 has a progressive aggravation process. COVID-19 can be classified into light, normal, severe, and critical types based on the severity of the disease : (1) Mild cases-the clinical symptoms were mild, and no pneumonia was found on the chest computed tomography (CT); (2) normal cases-fever, respiratory symptoms, and patients found to have imaging manifestations of pneumonia; (3) severe cases-one of the following three conditions: Respiratory distress, respiratory rate ≥ 30 times/min (in resting state, refers to oxygen saturation ≤ 93%), partial arterial oxygen pressure (PaO2)/oxygen absorption concentration (FiO2) ≤ 300 mmHg (1 mmHg = 0.133 kPa); (4) critical cases-one of the following three conditions: Respiratory failure and the need for mechanical ventilation, shock, or the associated failure of other organs requiring the intensive care unit . The current clinical data shows that the majority of the deaths occurred in the older patients. However, severe cases have been documented in young adults who have unique factors, particularly those with chronic diseases, such as diabetes or hepatitis B. Those with a long-term use of hormones or immunosuppressants, and decreased immune function, are likely to get severely infected. According to the demographic information of SARS patients, infection occurred in all age groups (the average age was dyspnea (40.8%), and diarrhea (31.6%) , and the prognosis of patients was associated with host characteristics (including age, gender, etc.) . During hospitalization, respiratory distress occurred in 90.8% of SARS patients . The duration from disease onset to severe respiratory distress was an average of 9.8 ± 3.0 days . During the disease course, some patients developed leukopenia, lymphopenia, and thrombocytopenia with an upregulation of aspartate transaminase (AST), alanine aminotransferase (ALT), lactic dehydrogenase (LDH), and C-reactive protein (CRP) . In comparison, COVID-19 showed similar trends with SARS patients . Fever, fatigue, and dry cough are the main manifestations of the patients, while nasal congestion, runny nose, and other symptoms of the upper respiratory tract are rare. Beijing Centers for Diseases Control and Prevention indicated that the typical case of COVID-19 has a progressive aggravation process. COVID-19 can be classified into light, normal, severe, and critical types based on the severity of the disease : (1) Mild cases-the clinical symptoms were mild, and no pneumonia was found on the chest computed tomography (CT); (2) normal cases-fever, respiratory symptoms, and patients found to have imaging manifestations of pneumonia; (3) severe cases-one of the following three conditions: Respiratory distress, respiratory rate ≥ 30 times / min (in resting state, refers to oxygen saturation ≤ 93%), partial arterial oxygen pressure (PaO2)/oxygen absorption concentration (FiO2) ≤ 300 mmHg (1 mmHg = 0.133 kPa); (4) critical cases-one of the following three conditions: Respiratory failure and the need for mechanical ventilation, shock, or the associated failure of other organs requiring the intensive care unit . The current clinical data shows that the majority of the deaths occurred in the older patients. However, severe cases have been documented in young adults who have unique factors, particularly those with chronic diseases, such as diabetes or hepatitis B. Those with a long-term use of hormones or immunosuppressants, and decreased immune function, are likely to get severely infected. According to the demographic information of SARS patients, infection occurred in all age groups (the average age was ≦45) . There was a proportional difference between male and female (female predominance) , with a male-to-female ratio of 1:1.25 . In addition, hospital staff had a higher risk due to the proximal interactions with large numbers from the infected population. For example, hospital staff accounted for 22% of all cases in Hong Kong and 22.8% in Guangdong . The mortality caused by SARS increased with age (> 64 years) , and the overall mortality rate during the outbreak of SARS was estimated at 9.6% . Li et al. reported that people who have not been exposed to SARS-CoV-2 are all susceptible to COVID-19 . Among the 8,866 patients who have been confirmed with COVID-19, nearly half of the patients have been aged 50 years or older (47.7%) . The male-to-female ratio is about 2.7:1 and the average incubation period is 5.2 days . However, severe COVID-19 cases and deaths have mostly been in the middle-aged adults and the elderly with long smoking histories or other 45) . There was a proportional difference between male and female (female predominance) , with a male-to-female ratio of 1:1.25 . In addition, hospital staff had a higher risk due to the proximal interactions with large numbers from the infected population. For example, hospital staff accounted for 22% of all cases in Hong Kong and 22.8% in Guangdong . The mortality caused by SARS increased with age (> 64 years) , and the overall mortality rate during the outbreak of SARS was estimated at 9.6% . Li et al. reported that people who have not been exposed to SARS-CoV-2 are all susceptible to COVID-19 . Among the 8,866 patients who have been confirmed with COVID-19, nearly half of the patients have been aged 50 years or older (47.7%) . The male-to-female ratio is about 2.7:1 and the average incubation period is 5.2 days . However, severe COVID-19 cases and deaths have mostly been in the middle-aged adults and the elderly with long smoking histories or other basic diseases, such as heart disease and hypertension . At the time that this paper was been submitted, COVID-19 patients mortality rate was 2.1% . According to the WHO data on Jul. 31th, 2003 , a total of 8,096 clinically diagnosed cases of SARS were reported worldwide, with 774 deaths and 26 countries and regions affected ). Most cases were in Asia, Europe, and America. The main countries in Asia were China (including mainland, Macao, Hong Kong, and Taiwan), Singapore, and so on. The total number of cases in mainland China was 5,327, with 349 deaths . The cases were mainly concentrated in Beijing, Guangdong, and Shanxi ( ) . In total, 2,102 patients were from Hong Kong, Macao, and Taiwan, with 336 deaths . Viruses 2020, 12, x FOR PEER REVIEW 5 of 18 basic diseases, such as heart disease and hypertension . At the time that this paper was been submitted, COVID-19 patients mortality rate was 2.1% . The new emerging SARS-CoV-2 shares about 80% of the gene sequence of SARS-CoV, released by the Military Medical Research Institute of Nanjing Military Region in 2003 . Recently, Shi et al. reported that the sequence similarity of coronavirus between SARS-CoV-2 and the coronavirus isolated from Rhinolophus affinis is 96.2%, and suggested that bats may be the source of the virus . So far, the intermediate hosts of SARS-CoV-2 are elusive and have been reported to be snakes, minks, or variable others . Recently, a research group of South China Agricultural University reported that pangolins may be one of the intermediate hosts for SARS-CoV-2, by analyzing more than 1,000 metagenomic samples, because they found that 70% of pangolins are positive for the coronavirus. Moreover, the virus isolate from pangolin shared 99% sequence similarity with the current infected human strain SARS-CoV-2 . Taking this recent research into consideration, we agreed that pangolin is more likely to be one of intermediate hosts of SARS-CoV-2. According to the latest data on Feb. 14th, 2020 , there have been a total of 67,081 clinically diagnosed cases of COVID-19 in worldwide, with 1,526 deaths. A total of 25 countries and regions have infected people. Due to the Spring Festival transportation peak, the disease has been spread more rapidly across China ). As the origin area of COVID-19, Hubei province has been the most severely infected area, with 54,406 cumulative diagnosis cases. Wuhan city has 37,914 cases. Guangdong, Henan, and Zhejiang province have 1,294 cases, 1,212 cases, and 1,162 cases, respectively ). At present, the COVID-19 outbreak has been spread to all parts of China and around the world, including the United States, Thailand, and Japan. It has been noticed that most of these patients have ever been to Wuhan or contacted with people who had been in Wuhan. The distribution of COVID-2019 patients in China (including Hong Kong, Macao and Taiwan) and Hubei Province is shown in . SARS were reported worldwide, with 774 deaths and 26 countries and regions affected . Most cases were in Asia, Europe, and America. The main countries in Asia were China (including mainland, Macao, Hong Kong, and Taiwan), Singapore, and so on. The total number of cases in mainland China was 5,327, with 349 deaths . The cases were mainly concentrated in Beijing, Guangdong, and Shanxi ) . In total, 2,102 patients were from Hong Kong, Macao, and Taiwan, with 336 deaths . According to the latest data on Feb. 14th, 2020 , there have been a total of 67,081 clinically diagnosed cases of COVID-19 in worldwide, with 1,526 deaths. A total of 25 countries and regions have infected people. Due to the Spring Festival transportation peak, the disease has been spread more rapidly across China ). As the origin area of COVID-19, Hubei province has been the most severely infected area, with 54,406 cumulative diagnosis cases. Wuhan city has 37,914 cases. Guangdong, Henan, and Zhejiang province have 1,294 cases, 1,212 cases, and 1,162 cases, respectively ). At present, the COVID-19 outbreak has been spread to all parts of China and around the world, including the United States, Thailand, and Japan. It has been noticed that most of these patients have ever been to Wuhan or contacted with people who had been in Wuhan. The distribution of COVID-2019 patients in China (including Hong Kong, Macao and Taiwan) and Hubei Province is shown in .  As the number of COVID-19 patients in China has been growing rapidly, preventing the spread of SARS-CoV-2 is the most important and urgent task . It was shown that human-to-human transmission of SARS-CoV-2 has spread via droplets or close contacts , but aerosol and fecal-oral transmission still need further study . To reduce virus transmission, early detection and isolation are essential. In addition, close monitoring in crowded places is also important . The possible pathogens of SARS and COVID-19 are both derived from wild animals . Therefore, hunting, selling, and eating wild animals not only seriously damage the ecosystem, but also lead to the spread of epidemic diseases . Thus, banning all wildlife trade is an effective measure to prevent viral prevalence. Wearing level-D protective clothing can protect medical staff from infection of respiratory viruses . A vaccine against SARS-CoV has not been described in any published articles . However, on Jan. 26th, 2020, the China CDC started to develop a new vaccine for SARS-CoV-2. The virus has been successfully isolated and seed strains have been screened . The early symptoms of SARS and COVID-19 are very similar to winter influenza, and the most important way to distinguish flu and pneumonia is to take throat swabs for viral testing . Current diagnostic tests for coronavirus include RT-PCR, real-time reverse transcription PCR (rRT-PCR), reverse transcription loop-mediated isothermal amplification, as well as real-time RT-LAMP . National Medical Products Administration has approved seven new nucleic acid test reagents for coronavirus, which were developed based on fluorescence PCR by Feb. 1st, 2020 . Suspected infections can be detected accurately and quickly for timely isolation and treatment to avoid infecting others by using these test reagents. Both SARS-CoV and SARS-CoV-2 are CoVs; hence, the treatment strategies of SARS could be relevant for COVID-19 . In 2003, SARS was mainly treated by isolation of the patients, hormones treatment, antiviral and symptomatic treatments, and many drugs such as glucocorticoid and interferon . Now, isolation, antiviral, and symptomatic treatments are still mainly adopted for COVID-19 treatment. As effective drugs for SARS, hormones and interferons can also be used to treat COVID-19 . Lopinavir is one kind of protease inhibitor used to treat HIV infection, with ritonavir as a booster. Lopinavir and/or ritonavir has anti coronavirus activity in vitro. Hong Kong scholars found that, compared with ribavirin alone, patients treated with lopinavir/ritonavir and ribavirin had lower risk of acute respiratory distress syndrome (ARDS) or death caused by SARS-CoV . Lopinavir/ritonavir has also been clinically tested in treatment of COVID-19, and showed wonderfully effective treatment for some patients, but the general clinical effect has not been determined . More effective treatments are still under continuing exploration: On Jan. 25th, 2020, a joint research team from the Shanghai Institute of Materia Medica, Chinese Academy of Sciences, and Shanghai Tech University screened and identified 30 potential drugs that are reported to be effective against SARS-CoV-2 . A high-resolution crystal structure of SARS-CoV-2 coronavirus 3CL hydrolase (Mpro) was announced after the outbreak of COVID-19 in the world , and human coronaviruses (HCoVs) have been treated as severe pathogens in respiratory tract infections. Nelfinavir was predicted to be a potential inhibitor of SARS-CoV-2 main protease . The first patient in the US had been trial-treated with intravenous remdesivir (a novel nucleotide analogue prodrug in development) due to a severe infection . No adverse reactions were observed during the administration, and the patient's condition was effectively improved . Clinical trials of remdesivir for treatment of COVID-19 just started on Feb. 5th and 12th, 2020 in Wuhan and Beijing, respectively, and the experimental results remain unclear . Many studies have been performed to study the pathogenesis of SARS-CoV . The spike (S) protein and N protein confer stability to the viral particle . The N protein is a structural protein involved in virion assembly, and plays a pivotal role in virus transcription and assembly efficiency . S protein can bind to the cellular receptors of sensitive cells and mediate infection of their target cells, after which it begins to replicate in the cytoplasm . SARS-CoV mainly targets the lungs, immune organs, and small systemic blood vessels and causes systemic vasculitis and decrease of immune function . More seriously, the infection leads to extensive pulmonary consolidation, diffuse alveolar damage, and the formation of a transparent membrane, finally deteriorating to respiratory distress . As a large number of people have left Wuhan, the control of the epidemic situation is extremely urgent, and the treatments of COVID-19 are imminent. On Feb. 14th, 2020, there were more than 54,000 confirmed patients in Hubei province, China . Due to the lack of effective antiviral drugs, the prognosis of patients solely depends on their age and physical condition . Although it was reported that the clinically recovered patients exceed the number of dead, the majority of the patients are still not cured in hospital. In addition, the potential adaptive mutation of SARS-CoV-2 makes it difficult for vaccine development. Therefore, it is urgent for us to develop more sensitive inspection methods and effective drugs. In addition to the well-known SARS-CoV, MERS-CoV, as one Merbecovirus subgenus of β-CoVs, is also extremely invasive. MERS-CoV is the pathogen of the Middle East Respiratory Syndrome, which can infect both humans and animals, and can be transmitted through camels . It mainly occurs in Saudi Arabia and has a high mortality rate . Studies had demonstrated that the clinical course of SARS and MERS was highly similar, and SARS and MERS may have similar pathogenesis . The genome sequence of SARS-CoV-2 also shows some similarities to that of MERS-CoV. It will be very interesting to study the relationship among SARS-CoV, MERS-CoV, and SARS-CoV-2 that may be exploited for future developing broad-spectrum antiviral therapies. Although more and more studies for SARS-CoV-2 have sprung up since the outbreak of this epidemic COVID-19, based on our comparison, we propose some key questions to be clarified in future studies ). In-depth understanding the underlying pathogenic mechanisms of SARS-CoV-2 will reveal more targets for better therapy of COVID-19. . Proposed questions to study SARS-CoV-2 for future studies. Can the susceptibility of asymptomatic carriers be judged by detecting the serum reactivity level of N protein? Apart from droplet transmission and contact transmission, are there other methods to transmit SARS-CoV-2? What is the percentage of COVID-19 patients have been infected with SARS and produced antibodies? Does traditional Chinese medicine have any effect on the treatment of COVID-19 caused by SARS-CoV-2? Do any environmental factors, such as regional conditions or climate, affect SARS-CoV-2 transmission? Author Contributions: Conceptualization, L.X., Y.W. and X.G.; methodology, J.X. and W.Z.; software, S.Z. and W.Z.; validation, J.X., S.Z. and T.T.; formal analysis, J.X. and S.Z.; writing-original draft preparation, J.X. and S.Z.; writing-review and editing, L.X., Y.W., W.Z. and X.G.; visualization, J.X., S.Z., and A.E.A.; funding acquisition, L.X., Y.W. and X.G. All authors have read and agreed to the published version of the manuscript. ",
     'qas': [{'id': 'a3db6ed9-11ef-47bb-8e84-63dffaebe9dd',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'f327c68c-a67d-4dcd-977f-f3d2871ab090',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '91c9996a-a37c-4616-9c7e-e2df2eaa14a8',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': 'fb4e9607-d635-429b-9432-5815defa3e7a',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '902e65c3-8ffb-4098-a7b0-528a563acfd6',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '5a2df3ec-8570-4b51-830c-c19e0d17bdf6',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': 'c20d971d-7430-422c-a6e9-0d9fa1a1397f',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': 'bd0734b1-485a-4917-978f-602214a954f3',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '83bc91e0-1141-43fe-b114-321ab067d61a',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': '6e2ecb98-8bd7-4e2c-bc3f-c86225c0e719',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'ac08f3fe-4f51-4636-8266-f9889e464128',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': 'China officially declared the epidemic as an outbreak on January 20 when obvious human-to-human transmissions were ascertained with reagent probes and primers distributed to local agencies on that day. Immediately following the declaration, massive actions were taken the next day to curb the epidemic at Wuhan, and soon spread to the whole country from central to local government, including all sectors from business to factories and to schools. On February 23, 2020, Wuhan City and other cities along with the main traffic lines around Wuhan were locked down. Rigorous efforts were devoted to 1) identify the infected and bring them to treatment in hospitals for infectious diseases, 2) locate and quarantine all those who had contact with the infected, 3) sterilize environmental pathogens, 4) promote mask use, and 5) release to the public of number of infected, suspected, under treatment and deaths on a daily basis. The sudden escalation of the control and the spread of the number of infected and deaths, however, ignited strong emotional responses of fear and panic among people in Wuhan. The negative emotional responses soon spread from Wuhan to other parts of China, and further to the world via almost all communication channels, particularly social media. The highly emotional responses of the public were fueled by (1) sudden increases in the number of detected new cases after the massive intervention measures to identify the infected; (2) massive growing needs for masks; (3) a large number of suspected patients waiting to confirm their diagnose; (4) a large number of diagnosed COVID-19 patients for treatment; and (5) a growing number of deaths, despite national efforts to improve therapy, including the decision to build two large hospitals within a period of days. The emotional responses, mostly stimulated by the daily release of data have created a big barrier for effective control of the epidemic as has been observed in other epidemics of similar nature . It is a paradox that during the early period of an epidemic, little is known or available about the new infections; while the need for such information is at the highest level. This is particularly true for the COVID-19. The occurrence of this epidemic may follow a nonlinear, chaotic and catastrophic process, rather similar to the epidemic of SARS that occurred in Hong Kong in 2003 , the Ebola epidemic in West Africa during 2013-16 , the pandemic of 2009 H1N1 epidemic started and the recent measles outbreaks in the United States (US) . Similar to an eruption of a volcano or occurrence of an earthquake, no matter how closely it is monitored, how much research we have done, how much we know about it, no one knows for sure if and when the virus infection will become an outbreak. Therefore, there is no so-called rational responses, no standardoperating-procedure (SOP) to follow, no measures to take without negative consequences . However, defining the COVID-19 as nonlinear and chaotic does not mean that we cannot do anything after we knew it was an outbreak, but simply waiting. On the contrary, defining it as nonlinear and chaotic will better inform us to make right decisions and to take appropriate actions. (1) During the early stage of an infection, which we cannot tell whether it will be growing into an outbreak, we must closely monitor it using limited data and to find the early signs of change and to predict if and when it will become an outbreak; (2) After it is declared as an outbreak, it is better to take actions as soon as possible since infectious diseases can be controlled even without knowledge of the biology ; and evaluate if the control measures work. The ultimate goal of this study is to attempt to provide some solutions to this paradox by providing early messages to inform control measures, to be optimistic and not panic, to ask right questions, and to take right actions. In theory, the true number of persons with COVID-19 infection can never be known no matter how we try to detect it. In practice, of all the infected cases in a day, there are some who have passed the latent period when the virus reaches a detectable level. These patients can then be detected if: a) detection services are available to them, b) all the potentially infected are accessible to the services and are tested, and c) the testing method is sensitive, valid and reliable. When reading the daily data, we must be aware that the detected and diagnosed cases in any day can be great, equal, or below the number of detectable. For example, a detectable person in day one can be postponed to next day when testing services become available. This will result in reduction in a detection rate < 100% in the day before the testing day and a detection rate > 100% in the testing day. Results of F(x) provide information most useful for resource allocation to support the prevention and treatment; however F(x) is very insensitive to changes in the epidemic. To better monitor the epidemic, the first derivative of F(x) can be used: We used these estimated P i in this study in several ways. The modeling analysis was completed using spreadsheet. As a reference to assess the level of severity of the COVID-19 epidemic, the natural mortality rate of Wuhan population was obtained from the 2018 Statistical Report of Wuhan National Economy and Social Development. The dynamic changes based on the observed F(x) in were presented in using the first derivatives F ′ (x) (top panel of the figure) and the second derivative F ′ ′ (x) (bottom panel of the figure), respectively. Before the declaration of outbreak, information provided by the two dynamic measured was similar: not much variations were revealed relative to the changes after the outbreak. These findings suggest the nonlinear and chaotic character of the COVID-19 outbreak. In this study, we used a novel approach to distill information from the cumulative number of diagnosed cases of COVID-19 infection. Among various types of surveillance data, this data often reported the earliest and on a continuous basis with high completeness and are most widely available. In addition, patients with a diagnosed infection are those with high likelihoods to spread the virus to others. Findings from this study provided useful information in a real time manner to monitor, evaluate and forecast the COVID-19 epidemic in China. The methods used in this study although somewhat mathematical, are easy to follow while information extracted from the commonly used data with the methods are highly useful and more sensitive than the daily new and cumulative cases. Although an analytical demonstration of the COVID-19 outbreak as nonlinear, chaotic and catastrophic requires more time to wait till the epidemic ends, evidence in the first 2 months suggests that the COVID-19 outbreak in China is nonlinear and chaotic. The epidemic emerged suddenly after a long latent period without dramatic changes as revealed from the cumulative cases, and their first and second derivatives. The high responsiveness of the epidemic to interventions adds additional evidence supporting the chaotic and catastrophic nature, and demonstrating the selection of a good timing to start intervention. Many of these characters are similar to those observed in the 2003 SARS epidemic started in Hong Kong , the 2013-16 Ebola spread in the West Africa , the 2009 pandemic of H1N1 started in the US , and the measles outbreaks over 80 cities in the US recently . Even the seasonal common flu has been proved to have a nonlinear component . The significance of nonlinear and chaotic nature of COVID-19 means that no methods are available to predict exactly at what point in time the epidemic will emerge as an outbreak, just like volcanoes and earthquakes. Therefore, practically there is no so-called a best time or missed the best time to take actions. There will also no so-called rational analysis and rational responses. There is no silver bullet to use, no standard-operating-procedure (SOP) to follow, and no measures without negative consequences to control the epidemic . For example, it took more than 6 months for both the US and the WHO to determine the 2009 H1N1 pandemic as an outbreak . Therefore, knowing the nonlinear and chaotic nature of an epidemic outbreak, like COVID-19, for all stockholders will be essential to the mobilization of resources, working together, taking all actions possible to control the epidemic, and minimizing the negative consequences. Specifically, what we can do to deal with an outbreak like COVID-19 would be to (1) collect information as early as possible, (2) monitor the epidemic as close as possible just like we do for an earthquake and make preparations for a hurricane and (3) communicate with the society and use confirmed data appropriately reframed not causing or exacerbating fear and panic in the public, stress and distress among medical and public health professionals, as well as administrators to make right decisions and take the right strategies at the right time in the right places for the right people. Knowing the nonlinear and chaotic nature is also essential for taking actions to control the outbreak of an epidemic like the COVID-19 infection. As soon as an outbreak is confirmed, the follow measures should be in position immediately 1) closely and carefully monitor the epidemic; 2) take evidence-based interventions to control the epidemic, 3) actively assess responses of the epidemic to the interventions; 4) allow errors in the intervention, particularly during the early period of the epidemic, 5) always prepare for alternatives. Another confusion is, when an epidemic starts, everyone asks what it is? How does it happen? How should I do to avoid infection? Is there any effective treatment? Answering these questions takes time, but there is no need to wait till all these questions are resolved before taking actions. We can take actions to prevent COVID-19 immediately while waiting for answers to these questions. This is because we have the evidence-based strategy for control and prevention of any infectious disease without complete understanding of an infection. That is so-called Tri-Component Strategy: locating and controlling the sources of infection, identifying and blocking the transmission paths, and protecting those who are susceptible . This was just what China has done, is doing, and will continue to do this time. Typical examples of control and prevention measures include locking down of cities, communities, and villages with potential of large scale transmission, massive environment sterilization, promotion of mask use, efforts to locate, isolate and treat the infected. More importantly, most of these actions are initiated, mobilized, coordinated and supported by the government from central to local, and enhanced by volunteers and international support. There are a number of advantages of methods we developed and used in this study. First, framing the diagnosed cases as the cumulative, the first and the second derivative constructs a system to gauge the epidemic, with the cumulative cases showing the overall level of the epidemic, the first derivative to reflect the change of the epidemic, and the second derivative to monitor the speed of change. By inclusion of the mortality rate as a reference, results from our approach will be (1) comprehensive to inform the public to be prepared, not scared and not to blame others; (2) useful for administrators to make decisions; (3) valuable for medical and health professionals to take actions. Second, we conceptually separated (1) the true number of infections, which will never be practically detected, from (2) the infections that are practically detectable if services are available and accessible and detection technologies are sensitive and reliable, and (3) the actually detected cases of infections. This classification greatly improved our understanding of the observed data as well as findings from the two derivatives, and aided us in assessing the responsiveness to the massive interventions, and predicting of the epidemic over time. The clarification also enhanced our analytical approach by adding an exponential model to evaluate the detection rate and to bring more data assessing the responsiveness of the epidemic to the massive interventions. We highly recommend the inclusion of the methods as a part of routine surveillance in disease control and prevention institutions. There are limitations. First, this study covered only the first 2 months of the epidemic. We will continue to evaluate the utility of this method as we follow the development of the epidemic. Second, the methods used in this study was based on a close population. This hypothesis may not be true because of a large number of people with potential history of exposure in China traveled to other countries. Up to February 8, 2020, the total cases diagnosed were 37,552 worldwide (Worldometer on Coronavirus) with 37,198 in China, which accounted for 99.1% of the total number of the world. Therefore, the impact of close-population assumption would be rather limited. Third, there was a lack of individual patientlevel data for detailed analysis. Fourth, our model can be further improved with other data, such as cases by severity, number of the suspected, number of those who received treatments and treatment results. We will follow the epidemic closely and prepare for further research on the topic when more data become available. Despite the limitations, this study provided new data to encourage those who are infected to better fight against the infections; to inform and encourage the general public, the medical and health professionals and the government to continue their current measures and to think of more measures that are innovative and effective to end the COVID-19 epidemic. One of the greatest motivations for this study is to attempt to provide right information at the population level in a real manner to complement the data from micro-organism centered and laboratory-based biological, molecular, pharmacological and clinical information in both the academic and the mass media that often scare rather than encourage people, even health professionals. Of the diagnosed COVID-19 cases, less than 20% are severe. Findings from our study indicated that there is no need to be panic from a public health population perspective. Although the total cases COVID-19 reached to big numbers, but the 2-month incidence rate was about a half of the natural death rate for Wuhan residents.',
     'qas': [{'id': '637431d2-a3a9-4b54-8304-bef33b5767dd',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': 'a06906b3-0874-46d7-97b8-4b11a387b222',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '94eaeec5-275f-431d-9c57-1474c79cf1ad',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '22970b94-2a50-4e11-a3bd-ce0248f35e23',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '50e7e9bf-f699-4863-a781-7bf8968e2b5f',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': 'e4b9877f-29a5-44c4-9774-f667922e98a2',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '34203641-7978-418c-be3f-62961ae62e1f',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': 'e339c085-749c-4dab-8252-0282654edb2d',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '06536d73-8488-4bd3-86c4-7a8f454bce6d',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': 'b25bc7d0-3c74-49b9-8fcc-0de30c9b4b10',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': 'c260311f-00f3-422f-aa9f-03ca9e5a9641',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]},
    {'context': "Upon a viral outbreak, it is important to rapidly establish whether the outbreak is caused by a new or a previously known virus (Box 1), as this helps decide which approaches and actions are most appropriate to detect the causative agent, control its transmission and limit potential consequences of the epidemic. The assessment of virus novelty also has implications for virus naming and, on a different timescale, helps to define research priorities in virology and public health. For many human virus infections such as influenza virus 1 or norovirus 2 infections, well-established and internationally approved methods, standards and procedures are in place to identify and name the causative agents of these infections and report this information promptly to public health authorities and the general public. In outbreaks involving newly emerged viruses, the situation may be different, and appropriate procedures to deal with these viruses need to be established or refined with high priority. Virus nomenclature is a formal system of names used to label viruses and taxa. The fact that there are names for nearly all viruses within a species is due to the historical perception of viruses as causative agents of specific diseases in specific hosts, and to the way we usually catalogue and classify newly discovered viruses, which increasingly includes viruses that have not been linked to any known disease in their respective hosts (Box 1). The WHO, an agency of the United Nations, coordinates international public health activities aimed at combating, containing and mitigating the consequences of communicable diseases-including major virus epidemics-and is responsible for naming disease(s) caused by newly emerging human viruses. In doing so, the WHO often takes the traditional approach of linking names of specific diseases to viruses (Box 1) and assessing virus novelty by an apparent failure to detect the causative agent using established diagnostic assays. Researchers studying coronaviruses-a family of enveloped positive-strand RNA viruses infecting vertebrates 8 -have been confronted several times with the need to define whether a newly emerged virus causing a severe or even life-threatening disease in humans belongs to an existing or a new (yet-to-be-established) species. This happened with SARS 9-12 and with Middle East respiratory syndrome (MERS) 13,14 a few years later. Each time, the virus was placed in the taxonomy using information derived from a sequencebased family classification . However, the host of a given virus may be uncertain, and virus pathogenicity remains unknown for a major (and fast-growing) proportion of viruses, including many coronaviruses discovered in metagenomics studies using next-generation sequencing technology of environmental samples . These studies have identified huge numbers of viruses that circulate in nature and have never been characterized at the phenotypic level. Thus, the genome sequence is the only characteristic that is known for the vast majority of viruses, and needs to be used in defining specific viruses. In this framework, a virus is defined by a genome sequence that is capable of autonomous replication inside cells and dissemination between cells or organisms under appropriate conditions. It may or may not be harmful to its natural host. Experimental studies may be performed for a fraction of known viruses, while computational comparative genomics is used to classify (and deduce characteristics of) all viruses. Accordingly, virus naming is not necessarily connected to disease but rather informed by other characteristics. In view of the above advancements and when confronted with the question of whether the virus name for the newly identified human virus should be linked to the (incompletely defined) disease that this virus causes, or rather be established independently from the virus phenotype, the CSG decided to follow a phylogeny-based line of reasoning to name this virus whose ontogeny can be traced in the figure in Box 1. Year 2012 First name Name origin the place of new viruses through their relation to known viruses in established taxa, including placements relating to the species Severe acute respiratorysyndrome-related coronavirus. In the classification of nidoviruses, species are considered biological entities demarcated by a genetics-based method 21 , while generally virus species are perceived as man-made constructs . To appreciate the difference between a nidoviral species and the viruses grouped therein, it may be instructive to look at their relationship in the context of the full taxonomy structure of several coronaviruses. Although these viruses were isolated at different times and locations from different human and animal hosts (with and without causing clinical disease), they all belong to the species Severe acute respiratorysyndrome-related coronavirus, and their relationship parallels that between human individuals and the species Homo sapiens ). This evaluation is usually conducted in silico using phylogenetic analysis, which may be complicated by uneven rates of evolution that vary across different virus lineages and genomic sites due to mutation, including the exchange of genome regions between closely related viruses (homologous recombination). However, given that the current sampling of viruses is small and highly biased toward viruses of significant medical and economic interest, group composition varies tremendously among different viruses, making decisions on virus novelty group-specific and dependent on the choice of the criteria selected for this assessment. Initially, the classification of coronaviruses was largely based on serological (cross-) reactivities to the viral spike protein, but is now based on comparative sequence analyses of replicative proteins. The choice of proteins and the methods used to analyse them have gradually evolved since the start of this century . The CSG currently analyses 3CLpro, NiRAN, RdRp, ZBD and HEL1 (ref. 52 ) , two domains less than previously used in the analyses conducted between 2009 and 2015 (refs. . According to our current knowledge, these five essential domains are the only ones conserved in all viruses of the order Nidovirales . They are thus used for the classification by all ICTV nidovirus study groups (coordinated by the NSG). The available yet limited epidemiological and clinical data for SARS-CoV-2 suggest that the disease spectrum and transmission efficiency of this virus 31-35 differ from those reported for SARS-CoV 9 . To accommodate the wide spectrum of clinical presentations and outcomes of infections caused by SARS-CoV-2 (ranging from asymptomatic to severe or even fatal in some cases) , the WHO recently introduced a rather unspecific name (coronavirus disease 19, also known as COVID-19 (ref. )) to denote this disease. Also, the diagnostic methods used to confirm SARS-CoV-2 infections are not identical to those of SARS-CoV. This is reflected by the specific recommendations for public health practitioners, healthcare workers and laboratory diagnostic staff for SARS-CoV-2 (for example, the WHO guidelines for SARS-CoV-2 (ref. ). By uncoupling the naming conventions used for coronaviruses and the diseases that some of them cause in humans and animals, we wish to support the WHO in its efforts to establish disease names in the most appropriate way (for further information, see the WHO's guidelines for disease naming ). The further advancement of naming conventions is also important because the ongoing discovery of new human and animal viruses by next-generation sequencing technologies can be expected to produce an increasing number of viruses that do not (easily) fit the virus-disease model that was widely used in the pregenomic era (Box 1). Having now established different names for the causative virus (SARS-CoV-2) and the disease , the CSG hopes that this will raise awareness in both the general public and public health authorities regarding the difference between these two entities. The CSG promotes this clear distinction because it will help improve the outbreak management and also reduces the risk of confusing virus and disease, as has been the case over many years with SARS-CoV (the virus) and SARS (the disease). Intra-SARS-CoV distances example, SARS-CoV-2/human/Wuhan/X1/2019. This complete designation along with additional and important characteristics, such as pathogenic potential in humans or other hosts, should be included in the submission of each isolate genome sequence to public databases such as GenBank. In publications, this name could be further extended with a sequence database ID-for example, SARS-CoV-2/human/Wuhan/X1/2019_XYZ12345 (fictional example)-when first mentioned in the text. We believe that this format will provide critical metadata on the major characteristics of each particular virus isolate (genome sequence) required for subsequent epidemiological and other studies, as well as for control measures. Historically, public health and fundamental research have been focused on the detection, containment, treatment and analysis of viruses that are pathogenic to humans following their discovery (a reactive approach). Exploring and defining their biological characteristics in the context of the entire natural diversity as a species has never been a priority. The emergence of SARS-CoV-2 as a human pathogen in December 2019 may thus be perceived as completely independent from the SARS-CoV outbreak in 2002-2003. Although SARS-CoV-2 is indeed not a descendent of SARS-CoV , and the introduction of each of these viruses into humans was likely facilitated by independent unknown external factors, the two viruses are genetically so close to each other , panel c of the figure in Box 4) that their evolutionary histories and characteristics are mutually informative. The currently known viruses of the species Severe acute respiratory syndrome-related coronavirus may be as (poorly) representative for this particular species as the few individuals that we selected to represent H. sapiens in . It is thus reasonable to assume that this biased knowledge of the natural diversity of the species Severe acute respiratory syndrome-related coronavirus limits our current understanding of fundamental aspects of the biology of this species and, as a consequence, our abilities to control zoonotic spillovers to humans. Future studies aimed at understanding the ecology of these viruses and advancing the accuracy and resolution of evolutionary analyses 41 would benefit greatly from adjusting our research and sampling strategies. This needs to include an expansion of our current research focus on human pathogens and their adaptation to specific hosts to other viruses in this species. To illustrate the great potential of species-wide studies, it may again be instructive to draw a parallel to H. sapiens, and specifically to the impressive advancements in personalized medicine in recent years. Results of extensive genetic analyses of large numbers of individuals representing diverse populations from all continents have been translated into clinical applications and greatly contribute to optimizing patient-specific diagnostics and therapy. They were instrumental in identifying reliable predictive markers for specific diseases as well as genomic sites that are under selection. It thus seems reasonable to expect that genome-based analyses with a comparable species coverage will be similarly insightful for coronaviruses. Also, additional diagnostic tools that target the entire species should be developed to complement existing tools optimized to detect individual pathogenic variants (a proactive approach). Technical solutions to this problem are already available; for example, in the context of multiplex PCR-based assays . The costs for developing and applying (combined or separate) species-and virus-specific diagnostic tests in specific clinical and/or epidemiological settings may help to better appreciate the biological diversity and zoonotic potential of specific virus species and their members. Also, the further reduction of time required to identify the causative agents of novel virus infections will contribute to limiting the enormous social and economic consequences of large outbreaks. To advance such studies, innovative fundraising approaches may be required. Although this Consensus Statement focuses on a single virus species, the issues raised apply to other species in the family and possibly beyond. A first step towards appreciation of this species and others would be for researchers, journals, databases and other relevant bodies to adopt proper referencing to the full taxonomy of coronaviruses under study, including explicit mentioning of the relevant virus species and the specific virus(es) within the species using the ICTV naming rules explained above. This naming convention is, regretfully, rarely observed in common practice, with mixing of virus and species names being frequently found in the literature (including by the authors of this Consensus Statement on several past occasions). The adoption of accurate virus-naming practices should be facilitated by the major revision of the virus species nomenclature that is currently being discussed by the ICTV and is being planned for implementation in the near future . With this change in place, the CSG is resolved to address the existing significant overlap between virus and species names that complicates the appreciation and use of the species concept in its application to coronaviruses.",
     'qas': [{'id': '42a06f7e-f3c2-4830-8546-27638e0e871e',
       'is_impossible': '',
       'question': 'Effectiveness of drugs being developed and tried to treat COVID-19 patients.'},
      {'id': '14682c12-2c8f-43b8-8f13-f42d7fc9bde2',
       'is_impossible': '',
       'question': 'Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocyclinethat that may exert effects on viral replication.'},
      {'id': '4547fae8-5e3f-4f40-8ab4-90dc669a4762',
       'is_impossible': '',
       'question': 'Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients.'},
      {'id': '95e8573c-eeef-4663-bfdb-66d241f14e30',
       'is_impossible': '',
       'question': 'Exploration of use of best animal models and their predictive value for a human vaccine.'},
      {'id': '4f7836ed-58b3-4bea-bb1e-ef086317ef00',
       'is_impossible': '',
       'question': 'Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents.'},
      {'id': '08e80031-2123-4784-9fc0-785f1152c417',
       'is_impossible': '',
       'question': 'Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need.'},
      {'id': '9cde28ca-d4d6-4408-ae4f-6cd99a22d358',
       'is_impossible': '',
       'question': 'Efforts targeted at a universal coronavirus vaccine.'},
      {'id': '96475e7b-d029-41dc-81cf-1d06cfa2ad4b',
       'is_impossible': '',
       'question': 'Efforts to develop animal models and standardize challenge studies'},
      {'id': '8c064f0e-efd7-4a29-a457-d737b1abc0aa',
       'is_impossible': '',
       'question': 'Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers'},
      {'id': 'cd0bc676-dec2-4df9-902e-cdb61585fec7',
       'is_impossible': '',
       'question': 'Approaches to evaluate risk for enhanced disease after vaccination'},
      {'id': '83e1a169-65b7-486c-807e-4bfa312d32ec',
       'is_impossible': '',
       'question': 'Assays to evaluate vaccine immune response and process development for vaccines, alongside suitable animal models [in conjunction with therapeutics]'}]}],
   'title': 'Abstract'}],
 'version': 'v2.0'} 

# Predictions from BioBERT-QA for Task 3

In [None]:
ids = []
questions = []

for i in range(len(squad_task3_CZI['data'][0]['paragraphs'])):
    for j in range(len(squad_task3_CZI['data'][0]['paragraphs'][i]['qas'])):
        ids.append(squad_task3_CZI['data'][0]['paragraphs'][i]['qas'][j]['id'])
        questions.append(squad_task3_CZI['data'][0]['paragraphs'][i]['qas'][j]['question'])

id2q = {}
for k, v in zip(ids, questions): id2q[k]=v

with open('../input/topiqal/predictions.json', 'r') as fp:
    preds = json.load(fp)

for key, value in preds.items():
    print("Question: {}".format(id2q[key]))
    print("Answer: {}".format(value))
    print('\n')

> Question: Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers

> Answer: specific recommendations for public health practitioners, healthcare workers and laboratory diagnostic staff for SARS-CoV-2
> 
> 
> Question: Approaches to evaluate risk for enhanced disease after vaccination</br>

> Answer: phylogenetic analysis