In [1]:
#############################################################################
## Demo the USVA Hospitals COVID-19 BioSurveillance App
##
## Name: Chris Meaney
## Date: June 2021
#############################################################################

In [2]:
## Package dependencies
import pandas as pd

## For sessionInfo
from sinfo import sinfo

## For printing 
from pprint import pprint

## For NLP and clinical NLP (covid19 biosurveillance) 
import spacy
import medspacy
import cov_bsv

from medspacy.visualization import visualize_dep

In [3]:
## To suppress warnings
import warnings
warnings.filterwarnings('ignore')

  and should_run_async(code)


In [4]:
## Pandas Options (for print dataFrames to console)
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', 500)

In [5]:
## Import test strings
fpath = 'C:\\Users\\ChristopherMeaney\\Desktop\\Phenotype_COVID\\template_covid_test_strings_wch_cmedits.csv'
X = pd.read_csv(filepath_or_buffer=fpath)
X.shape

(82, 1)

In [6]:
## Print head of file to console
X.head(n=5)

Unnamed: 0,cleaned_value
0,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the Seegene Allplex 2019-nCoV Assay.,NOTE: The Seegene Allplex 2019-nCoV Assay has been approved,by Health Canada for Emergency Use Access (EUA) and has,been verified by the University Health Network/Sinai,Health Microbiology Laboratory."
1,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the BGI Real-Time Fluorescent,RT-PCR 2019-nCoV Assay.,NOTE: The BGI Real-Time Fluorescent RT-PCR 2019-nCoV Assay,has been approved by Health Canada for Emergency Use Access,(EUA) and has been verified by the University Health,Network/Sinai Health Microbiology Laboratory."
2,No swab
3,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the Luminex ARIES 2019-nCoV Assay.,NOTE: The Luminex ARIES 2019-nCoV Assay has not been,approved by Health Canada but has been verified by the,University Health Network/Sinai Health Microbiology,Laboratory."
4,negative


In [7]:
## Convert to list structure
texts = X.cleaned_value.to_list()
len(texts)

82

In [8]:
##############################################
## Apply cov_bsv and medspacy to texts
##############################################

In [9]:
nlp = cov_bsv.load()

In [10]:
nlp

<spacy.lang.en.English at 0x27f06b979c8>

In [11]:
nlp.pipe_names

['tagger',
 'parser',
 'concept_tagger',
 'target_matcher',
 'sectionizer',
 'context',
 'postprocessor',
 'document_classifier']

In [12]:
## Here we actually apply NLP processing function over corpus
docs = list(nlp.pipe(texts))

In [13]:
################################################
## What is classification of each document - this is main function in cov_bsv (i.e. document classification of COVID-19 post/neg/unk)
################################################
docs_classify = [doc._.cov_classification for doc in docs]
X['classify'] = docs_classify

In [14]:
## Print document and its classification to console
X

Unnamed: 0,cleaned_value,classify
0,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the Seegene Allplex 2019-nCoV Assay.,NOTE: The Seegene Allplex 2019-nCoV Assay has been approved,by Health Canada for Emergency Use Access (EUA) and has,been verified by the University Health Network/Sinai,Health Microbiology Laboratory.",UNK
1,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the BGI Real-Time Fluorescent,RT-PCR 2019-nCoV Assay.,NOTE: The BGI Real-Time Fluorescent RT-PCR 2019-nCoV Assay,has been approved by Health Canada for Emergency Use Access,(EUA) and has been verified by the University Health,Network/Sinai Health Microbiology Laboratory.",UNK
2,No swab,NEG
3,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the Luminex ARIES 2019-nCoV Assay.,NOTE: The Luminex ARIES 2019-nCoV Assay has not been,approved by Health Canada but has been verified by the,University Health Network/Sinai Health Microbiology,Laboratory.",UNK
4,negative,NEG
5,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,By the Altona RealStar SARS-CoV-2 RT-PCR Kit 1.0.,NOTE: The Altona RealStar SARS-CoV-2 RT-PCR Kit 1.0,has been verified at the University Health Network,Sinai Health Microbiology Laboratory.,It has not been cleared or approved by Health Canada.",UNK
6,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the cobas SARS-COV-2 Assay.,NOTE: The cobas SARS-COV-2 Assay has been approved,by Health Canada for Emergency Use Access (EUA) and has,been verified by the University Health Network/Sinai,Health Microbiology Laboratory.",UNK
7,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the BGI 2019-nCoV Assay.,NOTE: The BGI 2019-nCoV Assay has been approved,by Health Canada for Emergency Use Access (EUA) and has,been verified by the University Health Network/Sinai,Health Microbiology Laboratory.",UNK
8,"COVID-19 virus,DETECTED by real-time PCR.,* * * * * * * * * * * * * * * * * *,Testing performed using the Seegene Allplex 2019-nCoV Assay.,NOTE: The Seegene Allplex 2019-nCoV Assay has been approved,by Health Canada for Emergency Use Access (EUA) and has,been verified by the University Health Network/Sinai,Health Microbiology Laboratory.",POS
9,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the BGI Real-Time Fluorescent RT-PCR,2019-nCoV Assay.,NOTE: The BGI Real-Time Fluorescent RT-PCR 2019-nCoV Assay,has been approved,by Health Canada for Emergency Use Access (EUA) and has,been verified by the University Health Network/Sinai,Health Microbiology Laboratory.",UNK


In [15]:
X.classify.value_counts()

POS    38
UNK    29
NEG    15
Name: classify, dtype: int64

In [16]:
#############################################
## Get COVID-19 entities/utterances in each doc
#############################################
covid_ents = [[ent for ent in doc.ents] for doc in docs]
len(covid_ents)

82

In [17]:
## Print COVID-19 entities found in each document to console
pprint(covid_ents)

[[COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [],
 [COVID-19, SARS-CoV-2, SARS-CoV-2],
 [COVID-19, SARS-COV-2, SARS-COV-2],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, PCR,2019-nCoV, 2019-nCoV],
 [COVID-19, COVID-19, COVID-19, COVID-19, 2019-nCoV, 2019-nCoV],
 [],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, COVID-19, COVID-19, COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, Allplex,2019-nCoV, 2019-nCoV],
 [COVID-19, SARS-COV-2, SARS-COV-2],
 [],
 [COVID-19, COVID-19, COVID-19, COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, SARS-CoV-2, SARS-CoV-2],
 [COVID-19, SARS-CoV-2, SARS-CoV-2],
 [COVID-19, SARS-CoV-2, SARS-CoV-2],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19, SARS-CoV-2, SARS-CoV-2],
 [COVID-19, 2019-nCoV, 2019-nCoV],
 [COVID-19],
 [COVID-19,

In [18]:
X['covid_ents'] = covid_ents
X.head(n=5)

Unnamed: 0,cleaned_value,classify,covid_ents
0,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the Seegene Allplex 2019-nCoV Assay.,NOTE: The Seegene Allplex 2019-nCoV Assay has been approved,by Health Canada for Emergency Use Access (EUA) and has,been verified by the University Health Network/Sinai,Health Microbiology Laboratory.",UNK,"[(COVID-19), (2019-nCoV), (2019-nCoV)]"
1,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the BGI Real-Time Fluorescent,RT-PCR 2019-nCoV Assay.,NOTE: The BGI Real-Time Fluorescent RT-PCR 2019-nCoV Assay,has been approved by Health Canada for Emergency Use Access,(EUA) and has been verified by the University Health,Network/Sinai Health Microbiology Laboratory.",UNK,"[(COVID-19), (2019-nCoV), (2019-nCoV)]"
2,No swab,NEG,[]
3,"COVID-19 virus NOT detected by real-time PCR.,* * * * * * * * * * * * * * * * * * *,Testing performed using the Luminex ARIES 2019-nCoV Assay.,NOTE: The Luminex ARIES 2019-nCoV Assay has not been,approved by Health Canada but has been verified by the,University Health Network/Sinai Health Microbiology,Laboratory.",UNK,"[(COVID-19), (2019-nCoV), (2019-nCoV)]"
4,negative,NEG,[]


In [19]:
####################################################
## Visualize why NLP pipeline is making pos/neg/unk COVID-19 classifications
####################################################
for i, doc in enumerate(docs):
    cov_bsv.visualize_doc(doc, document_id=i)
    print("__"*20)
    visualize_dep(doc)
    print("__"*20)

________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


________________________________________


In [20]:
#####################
## Session Info
#####################

# from sinfo import sinfo
sinfo()

-----
cov_bsv     NA
medspacy    0.1.0.0
pandas      1.2.4
sinfo       0.3.1
spacy       2.3.2
-----
IPython             7.24.1
jupyter_client      6.1.12
jupyter_core        4.7.1
jupyterlab          3.0.16
notebook            6.4.0
-----
Python 3.7.10 | packaged by conda-forge | (default, Feb 19 2021, 15:37:01) [MSC v.1916 64 bit (AMD64)]
Windows-10-10.0.19041-SP0
8 logical CPU cores, Intel64 Family 6 Model 126 Stepping 5, GenuineIntel
-----
Session information updated at 2021-06-24 12:30
