In [1]:
import spacy
import json
from spacy.tokens import Doc
import entity_retokenizer
import citation_component
import description_extractor

with open('disorder_patterns.json', 'r') as file:
    disorder_patterns = json.load(file)

nlp = spacy.load("en_core_web_sm")

with open("../data/panic_attacks.txt", "r") as f:
    text = f.read()

# Add
# the
# EntityRuler
# to
# the
# pipeline
ruler = nlp.add_pipe('entity_ruler', after="ner")
ruler.add_patterns(disorder_patterns)

nlp.add_pipe("citation_component", after="ner")

nlp.add_pipe("entity_retokenizer_component", name='merge_phrases', after='citation_component')

Doc.set_extension("disorder_descriptions", default=[], force=True)
Doc.set_extension("disorder_citations", default=[], force=True)
Doc.set_extension("disorder_diagnoses", default=[], force=True)
nlp.add_pipe("disorder_extractor", after="merge_phrases")

doc = nlp(text)

# for op, diagnosis in doc._.disorder_diagnoses:
#     print(f"Diagnosis: {diagnosis}\n")

for disorder, citation in doc._.disorder_citations:
    print(f"Disorder: {disorder}\nCitation: {citation}\n")

(DSM-5, (American Psychiatric Association, 2013), at least 1 month, (American Psychiatric Association, 2013))
DSM-5
(American Psychiatric Association, 2013)
at least 1 month
(American Psychiatric Association, 2013)


### Pipeline

In [2]:
print(nlp.pipe_names)

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner', 'citation_component', 'merge_phrases', 'disorder_extractor', 'entity_ruler']


### Tokenizer

In [3]:
for token in doc:
    print(token.text)

"
Panic
attacks
"
are
discrete
episodes
of
intense
fear
or
discomfort
,
accompanied
by
physical
and
cognitive
symptoms
,
as
listed
in
the
DSM-5
panic
attack
checklist
(American Psychiatric Association, 2013)
.
Panic
attacks
are
discrete
by
virtue
of
their
sudden
or
abrupt
onset
and
brief
duration
,
as
opposed
to
gradually
building
anxious
arousal
.
Panic
attacks
in
panic
disorder
often
have
an
unexpected
quality
,
meaning
that
from
the
patient
's
perspective
,
they
appear
to
happen
without
an
obvious
trigger
or
at
unexpected
times
.
Indeed
,
the
diagnosis
of
panic
disorder
is
defined
by
recurrent
"
unexpected
"
panic
attacks
,
followed
by
at least 1 month
of
persistent
concern
about
their
recurrence
and
their
consequences
,
or
by
a
significant
change
in
behavior
consequent
to
the
attacks
(American Psychiatric Association, 2013)
.


### Tagger

In [4]:
for token in doc:
    print(token.text, token.tag_)

" ``
Panic NN
attacks NNS
" ''
are VBP
discrete JJ
episodes NNS
of IN
intense JJ
fear NN
or CC
discomfort JJ
, ,
accompanied VBN
by IN
physical JJ
and CC
cognitive JJ
symptoms NNS
, ,
as IN
listed VBN
in IN
the DT
DSM-5 NNP
panic NN
attack NN
checklist NN
(American Psychiatric Association, 2013) NNP
. .
Panic NN
attacks NNS
are VBP
discrete JJ
by IN
virtue NN
of IN
their PRP$
sudden JJ
or CC
abrupt JJ
onset NN
and CC
brief JJ
duration NN
, ,
as IN
opposed VBN
to IN
gradually RB
building VBG
anxious JJ
arousal NN
. .
Panic NN
attacks NNS
in IN
panic NN
disorder NN
often RB
have VBP
an DT
unexpected JJ
quality NN
, ,
meaning VBG
that IN
from IN
the DT
patient NN
's POS
perspective NN
, ,
they PRP
appear VBP
to TO
happen VB
without IN
an DT
obvious JJ
trigger NN
or CC
at IN
unexpected JJ
times NNS
. .
Indeed RB
, ,
the DT
diagnosis NN
of IN
panic NN
disorder NN
is VBZ
defined VBN
by IN
recurrent JJ
" ``
unexpected JJ
" ''
panic NN
attacks NNS
, ,
followed VBN
by IN
at least 1 month NN
of 

### POS Tagger

In [5]:
# from spacy import displacy
# 
# for sent in doc.sents:
#     displacy.render(sent, style="dep")
# 
for token in doc:
    print(token.text, token.dep_, token.head)


" punct are
Panic compound attacks
attacks nsubj are
" punct attacks
are ROOT are
discrete amod episodes
episodes attr are
of prep episodes
intense amod fear
fear pobj of
or cc fear
discomfort conj fear
, punct are
accompanied advcl are
by agent accompanied
physical amod symptoms
and cc physical
cognitive conj physical
symptoms pobj by
, punct accompanied
as mark listed
listed advcl accompanied
in prep listed
the det checklist
DSM-5 compound checklist
panic compound attack
attack compound checklist
checklist pobj in
(American Psychiatric Association, 2013) appos checklist
. punct are
Panic compound attacks
attacks nsubj are
are ROOT are
discrete acomp are
by prep discrete
virtue pobj by
of prep virtue
their poss onset
sudden amod onset
or cc sudden
abrupt conj sudden
onset pobj of
and cc onset
brief amod duration
duration attr are
, punct are
as mark opposed
opposed advcl are
to aux building
gradually advmod building
building xcomp opposed
anxious amod arousal
arousal dobj building
. p

### Lemmatizer

In [6]:

for token in doc:
    print(token.text, token.lemma_)

" "
Panic panic
attacks attack
" "
are be
discrete discrete
episodes episode
of of
intense intense
fear fear
or or
discomfort discomfort
, ,
accompanied accompany
by by
physical physical
and and
cognitive cognitive
symptoms symptom
, ,
as as
listed list
in in
the the
DSM-5 DSM-5
panic panic
attack attack
checklist checklist
(American Psychiatric Association, 2013) (American Psychiatric Association, 2013)
. .
Panic panic
attacks attack
are be
discrete discrete
by by
virtue virtue
of of
their their
sudden sudden
or or
abrupt abrupt
onset onset
and and
brief brief
duration duration
, ,
as as
opposed oppose
to to
gradually gradually
building build
anxious anxious
arousal arousal
. .
Panic panic
attacks attack
in in
panic panic
disorder disorder
often often
have have
an an
unexpected unexpected
quality quality
, ,
meaning mean
that that
from from
the the
patient patient
's 's
perspective perspective
, ,
they they
appear appear
to to
happen happen
without without
an an
obvious obvious
trigge

### Named Entity Recognition

In [7]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Panic attacks DISORDER
DSM-5 NORP
panic attack DISORDER
(American Psychiatric Association, 2013) CITATION
Panic attacks DISORDER
Panic attacks DISORDER
panic disorder DISORDER
panic disorder DISORDER
panic attacks DISORDER
at least 1 month DATE
(American Psychiatric Association, 2013) CITATION


In [8]:
from spacy import displacy

displacy.render(doc, style="ent")

In [9]:
from pathlib import Path
from spacy import displacy

output_path = Path(f"./displacy_new/ner.svg")
svg = displacy.render(doc, style="ent", jupyter=False)
with output_path.open("w", encoding="utf-8") as fh:
    fh.write(svg)