In [1]:
import spacy
from spacy.tokens import Doc
import entity_retokenizer
import citation_component
import description_extractor
import json

with open('disorder_patterns.json', 'r') as file:
    disorder_patterns = json.load(file)

nlp = spacy.load("en_core_web_sm")

if "citation_component" not in nlp.pipe_names:
    nlp.add_pipe("citation_component", after="ner")

nlp.add_pipe("entity_retokenizer_component", name='merge_phrases', after='citation_component')

# Add the EntityRuler to the pipeline
ruler = nlp.add_pipe('entity_ruler', after="ner")
ruler.add_patterns(disorder_patterns)

Doc.set_extension("disorder_descriptions", default=[], force=True)
Doc.set_extension("disorder_citations", default=[], force=True)
Doc.set_extension("disorder_diagnoses", default=[], force=True)
nlp.add_pipe("disorder_extractor", after="merge_phrases")

with open("../data/panic_attacks.txt", "r") as f:
    text = f.read()

# print(text)

doc = nlp(text)

# Print detected entities
# for ent in doc.ents:
#     print(ent.text, ent.label_)

for disorder, description in doc._.disorder_descriptions:
    print(f"Disorder: {disorder}\nDescription: {description}\n")

# for disorder, citation in doc._.disorder_descriptions:
#     print(f"Disorder: {disorder}\nCitation: {citation}\n")

# print(nlp.pipe_names)

# print(doc.token)

(Panic attacks, DSM-5, panic attack, (American Psychiatric Association, 2013), Panic attacks, Panic attacks, panic disorder, panic disorder, panic attacks, at least 1 month, (American Psychiatric Association, 2013))
Panic attacks
DSM-5
panic attack
(American Psychiatric Association, 2013)
Panic attacks
Panic attacks
panic disorder
panic disorder
panic attacks
at least 1 month
(American Psychiatric Association, 2013)
Panic attacks
(American Psychiatric Association, 2013)
panic attack
(American Psychiatric Association, 2013)
Panic attacks
Panic attacks
panic disorder
panic disorder
(American Psychiatric Association, 2013)
panic attacks
(American Psychiatric Association, 2013)
Disorder: Panic attacks
Description: are discrete episodes of intense fear or discomfort

Disorder: Panic attacks
Description: are discrete by virtue of their sudden or abrupt onset and brief duration

Disorder: Panic attacks
Description: have an unexpected quality



In [2]:
doc = nlp(text)

for token in doc:
    print(token.text, token.pos_, token.dep_)

(Panic attacks, DSM-5, panic attack, (American Psychiatric Association, 2013), Panic attacks, Panic attacks, panic disorder, panic disorder, panic attacks, at least 1 month, (American Psychiatric Association, 2013))
Panic attacks
DSM-5
panic attack
(American Psychiatric Association, 2013)
Panic attacks
Panic attacks
panic disorder
panic disorder
panic attacks
at least 1 month
(American Psychiatric Association, 2013)
Panic attacks
(American Psychiatric Association, 2013)
panic attack
(American Psychiatric Association, 2013)
Panic attacks
Panic attacks
panic disorder
panic disorder
(American Psychiatric Association, 2013)
panic attacks
(American Psychiatric Association, 2013)
" PUNCT punct
Panic attacks NOUN nsubj
" PUNCT punct
are AUX ROOT
discrete ADJ amod
episodes NOUN attr
of ADP prep
intense ADJ amod
fear NOUN pobj
or CCONJ cc
discomfort ADJ conj
, PUNCT punct
accompanied VERB advcl
by ADP agent
physical ADJ amod
and CCONJ cc
cognitive ADJ conj
symptoms NOUN pobj
, PUNCT punct
as SC

In [3]:
from spacy import displacy

for sent in doc.sents:
    displacy.render(sent, style="dep")

In [4]:
from pathlib import Path

for i, sent in enumerate(doc.sents):
    output_path = Path(f"./displacy/{i}.svg")
    print(output_path, sent)
    svg = displacy.render(sent, style="dep", jupyter=False)
    with output_path.open("w", encoding="utf-8") as fh:
        fh.write(svg)

displacy\0.svg "Panic attacks" are discrete episodes of intense fear or discomfort, accompanied by physical and cognitive symptoms, as listed in the DSM-5 panic attack checklist (American Psychiatric Association, 2013).
displacy\1.svg Panic attacks are discrete by virtue of their sudden or abrupt onset and brief duration, as opposed to gradually building anxious arousal.
displacy\2.svg Panic attacks in panic disorder often have an unexpected quality, meaning that from the patient's perspective, they appear to happen without an obvious trigger or at unexpected times.
displacy\3.svg Indeed, the diagnosis of panic disorder is defined by recurrent "unexpected" panic attacks, followed by at least 1 month of persistent concern about their recurrence and their consequences, or by a significant change in behavior consequent to the attacks (American Psychiatric Association, 2013).


In [5]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Panic attacks DISORDER
DSM-5 NORP
panic attack DISORDER
(American Psychiatric Association, 2013) CITATION
Panic attacks DISORDER
Panic attacks DISORDER
panic disorder DISORDER
panic disorder DISORDER
panic attacks DISORDER
at least 1 month DATE
(American Psychiatric Association, 2013) CITATION


In [6]:
displacy.render(doc, style="ent")

In [7]:
import pandas as pd

# Get named entity list per sentence

sent_entity_df = []

for sent in doc.sents:
    entity_list = [ent.text for ent in sent.ents]
    sent_entity_df.append({"sentence": sent, "entities": entity_list})

sent_entity_df = pd.DataFrame(sent_entity_df)
sent_entity_df

Unnamed: 0,sentence,entities
0,"("", Panic attacks, "", are, discrete, episodes,...","[Panic attacks, DSM-5, panic attack, (American..."
1,"(Panic attacks, are, discrete, by, virtue, of,...",[Panic attacks]
2,"(Panic attacks, in, panic disorder, often, hav...","[Panic attacks, panic disorder]"
3,"(Indeed, ,, the, diagnosis, of, panic disorder...","[panic disorder, panic attacks, at least 1 mon..."


In [8]:
sentences = list(doc.sents)

for sentence in sentences:
    print(sentence.root)

root_token = sentences[0].root
for child in root_token.children:
    print(child)




    
    

are
are
have
defined
"
Panic attacks
episodes
,
accompanied
.
