# Converting PSYNDEX Terms Vocabulary to SKOS


Import libraries:


In [1]:
from rdflib import Graph, Literal
from rdflib.namespace import RDF, RDFS, Namespace, SKOS, XSD
from rdflib import BNode
from rdflib import URIRef
import xml.etree.ElementTree as ET
import csv

BF = Namespace("http://id.loc.gov/ontologies/bibframe/")
BFLC = Namespace("http://id.loc.gov/ontologies/bflc/")
MADS = Namespace("http://www.loc.gov/mads/rdf/v1#")
SCHEMA = Namespace("https://schema.org/")
WORKS = Namespace("https://w3id.org/zpid/resources/works/")
INSTANCES = Namespace("https://w3id.org/zpid/resources/instances/")
PXC = Namespace("https://w3id.org/zpid/ontology/classes/")
PXP = Namespace("https://w3id.org/zpid/ontology/properties/")
LANG = Namespace ("http://id.loc.gov/vocabulary/iso639-2/")
LOCID = Namespace("http://id.loc.gov/vocabulary/identifiers/")
ROLES = Namespace("https://w3id.org/zpid/vocabs/roles/")

TERMS = Namespace("https://w3id.org/zpid/vocabs/terms/")



In [2]:
# import csv

# make a list/set of newly added terms (postables = new concepts and nonpostables = new synoyms)
"# this update:",

# new postables:
new_concepts = {"Algorithmic Bias",
"Artificial Intelligence Ethics",
"Automated Diagnosis",
"Bayesian Algorithms",
"Brain Computer Interface",
"Breathing Techniques",
"Chatbots",
"Classification (Machine Learning)",
"Clustering (Machine Learning)",
"Cognitive Analytic Therapy",
"Compassion Focused Therapy",
"Computer Assisted Surgery",
"Computer Graphics",
"Computer Linguistics",
"Computer Security",
"Computer Systems",
"Computer Vision",
"Confirmation Bias",
"Conversation Analysis",
"Convolutional Neural Networks",
"Cortical Excitability",
"Decision Tree Algorithms ",
"Dietary Treatment",
"Digital Audio",
"Digital Piracy",
"Emotion Detection (Artificial Intelligence)",
"Equine Assisted Therapy",
"Ethical Decision Making",
"Exercise Therapy",
"Exposure and Response Prevention Therapy",
"Facial Recognition (Artificial Intelligence)",
"Generative Adversarial Networks ",
"Generative Artificial Intelligence",
"Humanoid Robots",
"Image Classification",
"Intelligent Personal Agents",
"Internet Access",
"Large Language Models",
"Machine Translation",
"Mentalization-Based Interventions",
"Metacognitive Therapy",
"Mindfulness Meditation",
"Mindfulness-Based Cognitive Therapy ",
"Mindfulness-Based Stress Reduction ",
"Moral Emotions",
"Nature-Based Interventions",
"Neuroethics",
"Noninvasive Brain Stimulation ",
"Optimization Algorithms",
"Oxygen Therapy",
"Podcasts",
"Positive Behavior Support",
"Positive Psychology Therapy",
"Post-COVID-19 Conditions",
"Predictive Analysis",
"Protective Behavioral Strategies",
"Psychosocial Interventions",
"Publication Bias",
"Recommender Systems",
"Recurrent Neural Networks",
"Regression (Machine Learning)",
"Repetitive Transcranial Brain Stimulation",
"Research Bias",
"Research Inclusivity",
"Robot Ethics",
"Sensor Technology",
"Social Prescribing",
"Socially Assistive Robots",
"Spiking Neural Networks",
"Spiritually Oriented Therapy",
"Supervised Learning",
"Support Vector Machine Algorithms",
"Theta Burst Stimulation",
"Thought Patterns",
"Transcranial Alternating Current Stimulation",
"Trauma-Focused Cognitive Behavior Therapy"}

# new "nonpostables"
new_altlabels = { 
"Aversion Therapy",
"Brain Computer Interface",
"Cloud Computing",
"Compassion Focused Therapy",
"Computer Linguistics",
"Computer Systems",
"Synthetic Speech",
"Text Analysis",
"Culturally Adapted Interventions",
"Computer Security",
"Breathing Techniques",
"Deep Neural Networks",
"Dietary Treatment",
"Nature-Based Interventions",
"Affective Computing",
"Optimization Algorithms",
"Play Therapy",
"Generative Artificial Intelligence",
"Optimization Algorithms",
"Equine Assisted Therapy",
"Equine Assisted Therapy",
"Oxygen Therapy",
"Post-COVID-19 Conditions",
"Artificial Intelligence Ethics",
"Computer Vision",
"Mentalization-Based Interventions",
"Metacognitive Therapy",
"Mindfulness-Based Cognitive Therapy",
"Intelligent Agents",
"Deep Neural Networks",
"Bayesian Algorithms",
"Brain Computer Interface",
"Digital Piracy",
"Optimization Algorithms",
"Positive Behavior Support",
"Positive Psychology Therapy",
"Post-COVID-19 Conditions",
"Predictive Analysis",
"Progressive Relaxation Therapy",
"Feedback",
"Decision Tree Algorithms",
"Recommender Systems",
"Experimental Ethics",
"Research Quality",
"Computer Assisted Surgery",
"Biased Sampling",
"Sensor Technology",
"Artificial Neural Networks",
"Social Prescribing",
"Automated Speech Recognition",
"Automated Speech Recognition",
"Support Vector Machine Algorithms",
"Synthetic Speech",
"Exercise Therapy",
"Noninvasive Brain Stimulation"
}

# an empty dict to fill with the rows for the csv:
new_terms_list = []
new_synonyms_list = []

## 1. Convert APA XML to SKOS

In [3]:
# set element tree


from rdflib import DCTERMS


root = ET.parse("/home/tina/Developement/zpid-vocabularies/psyndex-terms/2023-06-Summer.xml") # apa thesaurus file name goes here


language_tag = "en"
apa_thesaurus = Graph()
apa_thesaurus.bind("terms", TERMS) 

# Create a scheme by making a node in the graph and giving it an rdf:type skos:ConceptScheme:
apa_scheme = URIRef(TERMS)
apa_thesaurus.add((apa_scheme, RDF.type, SKOS.ConceptScheme))
apa_thesaurus.add((apa_scheme, DCTERMS.created, Literal("1973-01-01", datatype=XSD.date)))
apa_thesaurus.add((apa_scheme, DCTERMS.creator, Literal("APA")))
apa_thesaurus.add((apa_scheme, DCTERMS.description, Literal("Subjects for describing psychological topics and areas of research.")))
thesaurus_name = "Thesaurus of Psychological Index Terms"
apa_thesaurus.add((apa_scheme, DCTERMS.title, Literal(thesaurus_name, lang=language_tag)))
apa_thesaurus.add((apa_scheme, SKOS.prefLabel, Literal(thesaurus_name, lang=language_tag)))
apa_thesaurus.add((apa_scheme, DCTERMS.description, Literal("Subjects for describing psychological topics and areas of research.")))
# read today's date from the system and add it as dct:modified:
from datetime import date
today = date.today()
today_string = today.strftime("%Y-%m-%d")
apa_thesaurus.add((apa_scheme, DCTERMS.modified, Literal(today_string, datatype=XSD.date)))

# To see the source xml's structure, uncomment this function:
# def print_element(element, depth=0):
#     print("\t"*depth, element.tag, element.attrib, element.text)
#     for child in element:
#         print_element(child, depth+1)


# for child in root:
# for child in root.getroot():
#     # print_element(child)
#     term_count += 1
#     print(child.tag, child.attrib, child.text)
# print(root.getroot().tag, root.getroot().attrib, root.getroot().text)

# set root element for thesaurus. It is called "TermList" in the APA thesaurus
term_list = root.find("TermList")
# print(term_list.tag, term_list.attrib, term_list.text)

term_count = 0
concept_count = 0


# do this for every MainTerm in the thesaurus, but skip the ones that have a Use tag in their RelationsList:
for term in term_list:
# for term in term_list[0:7]:
    term_count += 1

    # make every term a concept by default - only if we find a USE tag later
    # do we set this to false. Finally, we add only those terms as concepts to the 
    # graph that are true.
    is_concept = True
    pref_label = None
    skos_notation = None
    scope_note = None
    history_note = None
    # make empty Sets for skos:related, skos:narrower, and skos:broader terms:
    # set because I don't want or need duplicates that may be in the data.
    related_terms = []
    broader_terms = []
    narrower_terms = []
    # and one for all altLabels:
    alt_labels = []

    
    #print(term.tag, term.attrib, term.text)
    pref_label = term.attrib["Subject"]
    skos_notation = term.attrib["Code"]
    dc_created_date = term.attrib["Introduced"]
    # go through all subelements of NotationList, if that exists, 
    # to find the one named "ScopeNote" and print its text:
    for subelement in term:
        if subelement.tag == "NotationList":
            for relation in subelement:
                if relation.tag == "ScopeNote":
                    scope_note = relation.text
                if relation.tag == "HistoricalNote":
                    history_note = relation.text
        if subelement.tag == "RelationList":
            for relation in subelement:
                if relation.tag == "RelatedTerm":
                    # add the related thing to the set broader_terms_set:
                    related_terms.append(relation.attrib["Code"])
                if relation.tag == "BroaderTerm":
                    broader_terms.append(relation.attrib["Code"])
                if relation.tag == "NarrowerTerm":
                    narrower_terms.append(relation.attrib["Code"])
                if relation.tag == "UsedFor":
                    # this is the altLabel, then:
                    alt_labels.append(relation.attrib["Subject"])
                if relation.tag == "Use":
                    is_concept = False # meaning it is just a label, not a concept
                    # we might export this as either a deprectated term or a skosxl:Label with 
                    # the text as skosxl:literalForm.
                    #print("Use: "+ relation.attrib["Code"])
    if is_concept:
        concept_count += 1
        # add a new element to the list for the csv table of newly added terms
        if pref_label in new_concepts:
            alt_labels_concatenated = ""
            for label in alt_labels:
                alt_labels_concatenated = label + "; "
            new_terms_list.append({"Code": skos_notation, "CT Englisch": pref_label, "Synonyme Englisch": alt_labels_concatenated, "CT Deutsch": None, "Synonyme Deutsch": None, "Scopenote": scope_note, "Historynote": history_note, "Indikatoren": None}) 
        
        # make a node in the graph for the concept with a URI built from 
        # "https://w3id.org/zpid/vocabs/terms/" + "Code"
        # concept_uri = "https://w3id.org/zpid/vocabs/terms/" + skos_notation
        concept_uri = TERMS[skos_notation]
        apa_thesaurus.add((concept_uri, RDF.type, SKOS.Concept))
        # add it to the scheme:
        apa_thesaurus.add((concept_uri, SKOS.inScheme, apa_scheme ))
        # then add the properties to it:
        ## first, the prefLabel 
        apa_thesaurus.add((concept_uri, SKOS.prefLabel, Literal(pref_label, lang=language_tag)))
        ## the notation:
        apa_thesaurus.add((concept_uri, SKOS.notation, Literal(skos_notation)))
        ## add the date of creation:
        apa_thesaurus.add((concept_uri, DCTERMS.created, Literal((dc_created_date+"-01-01"), datatype=XSD.date)))

        ## then, any altLabels:
        for label in alt_labels:
            apa_thesaurus.add((concept_uri, SKOS.altLabel, Literal(label, lang=language_tag)))
            # is it a new altLabel?
            if label in new_altlabels:
                print(label)
                # new_synonyms_list.append({"Neues Synonym": label, "Ist Synonym für postable Term": pref_label, "Code des postable Terms": skos_notation, "Scopenote": scope_note, "Historynote": history_note})

        # the scope_note and history_note:
        if scope_note is not None:
            apa_thesaurus.add((concept_uri, SKOS.scopeNote, Literal(scope_note, lang=language_tag)))
        if history_note is not None:
            apa_thesaurus.add((concept_uri, SKOS.historyNote, Literal(history_note, lang=language_tag)))

        for term in related_terms:
            apa_thesaurus.add((concept_uri, SKOS.related, URIRef(TERMS[term])))

        for term in broader_terms:
            apa_thesaurus.add((concept_uri, SKOS.broader, URIRef(TERMS[term])))

        for term in narrower_terms:
            apa_thesaurus.add((concept_uri, SKOS.narrower, URIRef(TERMS[term])))
    # if it is not a concept, it is only a synonym:
    else:
        # add it as a skosxl:Label to the graph?
        pass

               
apa_thesaurus.serialize("apa_thes.ttl", format="turtle")
print("Terms inkl. Synonyme: " + str(term_count))
print("Davon echte Concepts: " + str(concept_count))
print("Neue Concepts: " + str(len(new_concepts)))




Terms inkl. Synonyme: 10409
Davon echte Concepts: 6949
Neue Concepts: 76


In [4]:
# run the result through skosify:
# skosify -c skosify.cfg apa_thes.ttl -o apa_thes_skosified.ttl

## 2. Make helper files for manually translating newly added terms

Above, we added the list of new postable terms from the supporting document the APA sends into the variable new_terms_list.
In the loop through each concept, while writing the RDF nodes, we also filled a list with dicts that hold the information for each new term. We can use this list to create a table for Google Drive that psychologists can use to fill with German translations of the new terms. (We can then use this table to create a new SKOS file with the German translations.)

We make the table as a csv that can be imported into google sheets.

In [5]:


# 1. load the thesaurus file (could do this without rdf, too)
# 2. identify the uri of the new concepts from the new concept preflabels in new_concepts
# 3. get preflabel and scopenote and write into a csv file
# 4. identify the uri of the concept of each new synonym in new_altlabels
# 5. do the same as for 3  

with open('new_terms.csv', 'w', newline='') as csvfile:
    # termstable = csv.writer(csvfile, delimiter=' ',quotechar='|', quoting=csv.QUOTE_MINIMAL)
    termstable = csv.writer(csvfile, delimiter=' ')
    fieldnames = ['Code','CT Englisch','Synonyme Englisch','CT Deutsch','Synonyme Deutsch', 'Scopenote', 'Historynote','Indikatoren']
    termstable = csv.DictWriter(csvfile, fieldnames=fieldnames)
    termstable.writeheader()
    for term in new_terms_list:
        termstable.writerow(term)

with open('new_synonyms.csv', 'w', newline='') as csvfile:
    # termstable = csv.writer(csvfile, delimiter=' ',quotechar='|', quoting=csv.QUOTE_MINIMAL)
    synonymstable = csv.writer(csvfile, delimiter=' ')
    fieldnames = ['Neues Synonym','Ist Synonym für postable Term','Code des postable Terms','Scopenote','Historynote']
    synonymstable = csv.DictWriter(csvfile, fieldnames=fieldnames)
    synonymstable.writeheader()
    for term in new_synonyms_list:
        synonymstable.writerow(term)




## 3. Get German translations from STAR as XML
## 4. Convert STAR XML to SKOS (labels only)
## 5. Merge SKOS files, clean up with skosify