In [1]:
import spacy
from spacy.tokens import Doc
import entity_retokenizer
import citation_component
import description_extractor
import json

with open('disorder_patterns.json', 'r') as file:
    disorder_patterns = json.load(file)

nlp = spacy.load("en_core_web_sm")

if "citation_component" not in nlp.pipe_names:
    nlp.add_pipe("citation_component", after="ner")

nlp.add_pipe("entity_retokenizer_component", name='merge_phrases', after='citation_component')

# Add the EntityRuler to the pipeline
ruler = nlp.add_pipe('entity_ruler', after="ner")
ruler.add_patterns(disorder_patterns)

Doc.set_extension("disorder_descriptions", default=[], force=True)
nlp.add_pipe("disorder_extractor", after="merge_phrases")

with open("../data/panic_attacks_long.txt", "r") as f:
    text = f.read()

print(text)

doc = nlp(text)

# Print detected entities
for ent in doc.ents:
    print(ent.text, ent.label_)

for disorder, description in doc._.disorder_descriptions:
    print(f"Disorder: {disorder}\nDescription: {description}\n")

"Panic attacks" are discrete episodes of intense fear or discomfort, accompanied by physical and cognitive symptoms, as listed in the DSM-5 panic attack checklist (American Psychiatric Association, 2013). Panic attacks are discrete by virtue of their sudden or abrupt onset and brief duration, as opposed to gradually building anxious arousal. Panic attacks in panic disorder often have an unexpected quality, meaning that from the patient's perspective, they appear to happen without an obvious trigger or at unexpected times. Indeed, the diagnosis of panic disorder is defined by recurrent "unexpected" panic attacks, followed by at least 1 month of persistent concern about their recurrence and their consequences, or by a significant change in behavior consequent to the attacks (American Psychiatric Association, 2013). As with all basic emotions (Izard, 1992), panic attacks are associated with strong action tendencies: Most often, these are urges to escape, and less often, urges to fight. Th

In [2]:
doc = nlp(text)

for token in doc:
    print(token.text, token.pos_, token.dep_)

(Panic attacks, DSM-5, panic attack, (American Psychiatric Association, 2013), Panic attacks, Panic attacks, panic disorder, panic disorder, panic attacks, at least 1 month, (American Psychiatric Association, 2013), (Izard, 1992), panic attacks, panic attacks, Wilkinson et al., 1998, Margraf, Taylor, Ehlers, Roth, Agras, 1987, 40%, panic attacks, panic disorder, Barsky, Cleary, Sarnie, 1994, Barlow, Brown, & Craske, 1994, Craske & Tsao, 1999, Barlow, 1994, panic attacks, 1987, Lopatka, 1988, Kircanski, Craske, Epstein, 2009, panic disorder, panic attacks, panic attacks, Craske & Barlow, 1989, Uhde, 1994, panic attacks, between 1 and 3 hours, Craske & Barlow, 1989, panic disorder, 44-71%, 30-45%, Craske & Barlow, 1989, Krystal, Woods, Hill, 1991, Mellman & Uhde, 1989, Roy-Byrne, Mellman, & Uhde, 1988, Uhde, 1994, (Uhde, 1994), Nonclinical, panic attacks, 3-5%, panic disorder, Norton, Cox, & Malan, 1992, panic attacks, Barlow, 1985, Craske, 2010, panic disorder, panic attacks, DSM-5, (Am

In [3]:
from spacy import displacy

for sent in doc.sents:
    displacy.render(sent, style="dep")

In [4]:
from pathlib import Path

for i, sent in enumerate(doc.sents):
    output_path = Path(f"./displacy/{i}.svg")
    print(output_path, sent)
    svg = displacy.render(sent, style="dep", jupyter=False)
    with output_path.open("w", encoding="utf-8") as fh:
        fh.write(svg)

displacy\0.svg "Panic attacks" are discrete episodes of intense fear or discomfort, accompanied by physical and cognitive symptoms, as listed in the DSM-5 panic attack checklist (American Psychiatric Association, 2013).
displacy\1.svg Panic attacks are discrete by virtue of their sudden or abrupt onset and brief duration, as opposed to gradually building anxious arousal.
displacy\2.svg Panic attacks in panic disorder often have an unexpected quality, meaning that from the patient's perspective, they appear to happen without an obvious trigger or at unexpected times.
displacy\3.svg Indeed, the diagnosis of panic disorder is defined by recurrent "unexpected" panic attacks, followed by at least 1 month of persistent concern about their recurrence and their consequences, or by a significant change in behavior consequent to the attacks (American Psychiatric Association, 2013).
displacy\4.svg As with all basic emotions (Izard, 1992), panic attacks are associated with strong action tendencies

In [5]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Panic attacks DISORDER
DSM-5 NORP
panic attack DISORDER
(American Psychiatric Association, 2013) CITATION
Panic attacks DISORDER
Panic attacks DISORDER
panic disorder DISORDER
panic disorder DISORDER
panic attacks DISORDER
at least 1 month DATE
(American Psychiatric Association, 2013) CITATION
(Izard, 1992) CITATION
panic attacks DISORDER
panic attacks DISORDER
Wilkinson et al. PERSON
1998 DATE
Margraf ORG
Taylor PERSON
Ehlers PERSON
Roth PERSON
Agras PERSON
1987 DATE
40% PERCENT
panic attacks DISORDER
panic disorder DISORDER
Barsky, Cleary ORG
Sarnie ORG
1994 DATE
Barlow, Brown, & Craske ORG
1994 DATE
Craske & Tsao ORG
1999 DATE
Barlow PERSON
1994 DATE
panic attacks DISORDER
1987 DATE
Lopatka PERSON
1988 DATE
Kircanski PERSON
Craske ORG
Epstein GPE
2009 DATE
panic disorder DISORDER
panic attacks DISORDER
panic attacks DISORDER
Craske & Barlow ORG
1989 DATE
Uhde ORG
1994 DATE
panic attacks DISORDER
between 1 and 3 hours DATE
Craske & Barlow ORG
1989 DATE
panic disorder DISORDER
44-71% 

In [6]:
displacy.render(doc, style="ent")

In [7]:
import pandas as pd

# Get named entity list per sentence

sent_entity_df = []

for sent in doc.sents:
    entity_list = [ent.text for ent in sent.ents]
    sent_entity_df.append({"sentence": sent, "entities": entity_list})

sent_entity_df = pd.DataFrame(sent_entity_df)
sent_entity_df

Unnamed: 0,sentence,entities
0,"("", Panic attacks, "", are, discrete, episodes,...","[Panic attacks, DSM-5, panic attack, (American..."
1,"(Panic attacks, are, discrete, by, virtue, of,...",[Panic attacks]
2,"(Panic attacks, in, panic disorder, often, hav...","[Panic attacks, panic disorder]"
3,"(Indeed, ,, the, diagnosis, of, panic disorder...","[panic disorder, panic attacks, at least 1 mon..."
4,"(As, with, all, basic, emotions, (Izard, 1992)...","[(Izard, 1992), panic attacks]"
5,"(These, fight, and, flight, tendencies, usuall...",[]
6,"(Furthermore, ,, perceptions, of, imminent, th...",[]
7,"(However, ,, the, features, of, urgency, to, e...",[]
8,"(For, example, ,, despite, evidence, for, elev...","[panic attacks, Wilkinson et al., 1998, Margra..."
9,"(Moreover, ,, in, general, ,, patients, with, ...","[panic disorder, Barsky, Cleary, Sarnie, 1994]"


In [8]:
sentences = list(doc.sents)

for sentence in sentences:
    print(sentence.root)

root_token = sentences[0].root
for child in root_token.children:
    print(child)




    
    

are
are
have
defined
are
involve
accompany
are
found
are
lead
believe
report
termed
weakened
subset
refers
refer
is
occur
suggest
become
result
occur
occur
emphasized
is
is
exemplifies
Patient
lay
think
look
think
work
have
get
worry
unpleasant
am
illustrates
generalized
is
am
dread
be
Therapist
worries
Patient
be
be
is
Are
Patient
is
described
"
Panic attacks
episodes
,
accompanied
.
