# Medical RDF Jupyter Notebook

Imports

In [37]:
import spacy
import srsly
import pandas as pd
import networkx as nx

Set up NLP Pipeline

In [38]:
nlp = spacy.load("en_core_web_sm")
with open("data/panic_attacks.txt", "r") as f:
    text = f.read()
patterns = srsly.read_jsonl("data/patterns.jsonl")
ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns)
doc = nlp(text)

for sent in doc.sents:
    print(sent)

"Panic attacks" are discrete episodes of intense fear or discomfort, accompanied by physical and cognitive symptoms, as listed in the DSM-5 panic attack checklist (American Psychiatric Association, 2013).
Panic attacks are discrete by virtue of their sudden or abrupt onset and brief duration, as opposed to gradually building anxious arousal.
Panic attacks in panic disorder often have an unexpected quality, meaning that from the patient's perspective, they appear to happen without an obvious trigger or at unexpected times.
Indeed, the diagnosis of panic disorder is defined by recurrent "unexpected" panic attacks, followed by at least 1 month of persistent concern about their recurrence and their consequences, or by a significant change in behavior consequent to the attacks (American Psychiatric Association, 2013).


In [41]:
sent_entity_df = []

for sent in doc.sents:
    entity_list = [ent.text for ent in sent.ents]
    subject = [word.text for word in sent if
               word.dep_ == "csubj" or word.dep_ == "subjpass" or word.dep_ == "nsubj" or word.dep_ == "nsubjpass"]
    predicate = [word.text for word in sent if word.dep_ == "ROOT"]
    obj = [word.text for word in sent if
           word.dep_ == "dobj" or word.dep_ == "iobj" or word.dep_ == "obj" or word.dep_ == "oa" or word.dep_ == "oc" or word.dep_ == "og" or word.dep_ == "op" or word.dep_ == "pobj"]
    sent_entity_df.append(
        {"sentence": sent, "entities": entity_list, "subject": subject, "predicate": predicate, "object": obj})

sent_entity_df = pd.DataFrame(sent_entity_df)

sent_entity_df

Unnamed: 0,sentence,entities,subject,predicate,object
0,"("", Panic, attacks, "", are, discrete, episodes...","[Panic attacks, DSM-5, American Psychiatric As...",[attacks],[are],"[fear, symptoms, checklist]"
1,"(Panic, attacks, are, discrete, by, virtue, of...",[Panic attacks],[attacks],[are],"[virtue, onset, arousal]"
2,"(Panic, attacks, in, panic, disorder, often, h...",[Panic attacks],"[attacks, they]",[have],"[disorder, quality, perspective, trigger, times]"
3,"(Indeed, ,, the, diagnosis, of, panic, disorde...","[at least 1 month, American Psychiatric Associ...",[diagnosis],[defined],"[disorder, attacks, month, concern, recurrence..."


In [40]:
for sent in doc.sents:
    for token in sent:
        print(token.text, token.dep_)

" punct
Panic compound
attacks nsubj
" punct
are ROOT
discrete amod
episodes attr
of prep
intense amod
fear pobj
or cc
discomfort conj
, punct
accompanied advcl
by agent
physical amod
and cc
cognitive conj
symptoms pobj
, punct
as mark
listed advcl
in prep
the det
DSM-5 compound
panic compound
attack compound
checklist pobj
( punct
American compound
Psychiatric compound
Association appos
, punct
2013 npadvmod
) punct
. punct
Panic compound
attacks nsubj
are ROOT
discrete acomp
by prep
virtue pobj
of prep
their poss
sudden amod
or cc
abrupt conj
onset pobj
and cc
brief amod
duration attr
, punct
as mark
opposed advcl
to aux
gradually advmod
building xcomp
anxious amod
arousal dobj
. punct
Panic compound
attacks nsubj
in prep
panic compound
disorder pobj
often advmod
have ROOT
an det
unexpected amod
quality dobj
, punct
meaning advcl
that mark
from prep
the det
patient poss
's case
perspective pobj
, punct
they nsubj
appear ccomp
to aux
happen xcomp
without prep
an det
obvious amod
trigg

In [2]:
import spacy
import nltk
from rdflib import Graph, Literal, RDF, URIRef

# Stopwords herunterladen
nltk.download('stopwords')
from nltk.corpus import stopwords

# Laden des NLP-Modells
nlp = spacy.load('en_core_web_sm')


def preprocess_text(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if token.is_alpha and token.text not in stopwords.words('english')]
    return tokens


def extract_entities(text):
    doc = nlp(text)
    entities = {ent.text: ent.label_ for ent in doc.ents}
    return entities


def generate_rdf(entities):
    g = Graph()
    disorder = URIRef("http://example.org/disorder")
    g.add((disorder, RDF.type, URIRef("http://example.org/MedicalCondition")))
    for entity, label in entities.items():
        g.add((disorder, URIRef(f"http://example.org/{label}"), Literal(entity)))
    return g.serialize(format='turtle')


# Beispieltext
text = "Panic attacks are discrete episodes of intense fear."
tokens = preprocess_text(text)
entities = extract_entities(text)
rdf_data = generate_rdf(entities)
print(rdf_data)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\janes\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.



<http://example.org/disorder> a <http://example.org/MedicalCondition> .


