# Associate New WordNet Synset Terms with DNA Classes

In [1]:
import pickle
import stardog

sd_conn_details = {'endpoint': 'http://localhost:5820',
                   'username': 'admin',
                   'password': 'admin'}

with open('noun-dict.pickle', 'rb') as inFile:
    noun_dict = pickle.load(inFile)
with open('verb-dict.pickle', 'rb') as inFile:
    verb_dict = pickle.load(inFile)


In [2]:
noun_syns = list(noun_dict.keys())
verb_syns = list(verb_dict.keys())

# verbs/nouns.txt from query for verb/noun_synonym
# labels.txt from query for DNA class labels
with open("verbs.txt", "r") as f:   # List of syn words for verbs and nouns
    f_text = f.read()
verb_words = f_text.split("\n")
with open("nouns.txt", "r") as f:
    f_text = f.read()
noun_words = f_text.split("\n")

# Add Labels' Words to Noun_Words
with open("labels.txt", "r") as f:
    f_text = f.read()
label_words = f_text.split("\n")

for word in label_words:
    wds = word.split(" ")
    for wd in wds:
        wd = wd.lower()
        if wd != "and" and wd != "of" and wd not in noun_words:
            noun_words.append(wd)

In [3]:
query_subclass = "prefix urn: <urn:ontoinsights:dna:> select ?syn where {?syn rdfs:subClassOf urn:root }"
query_label = "prefix urn: <urn:ontoinsights:dna:> select ?label where {urn:syn rdfs:label ?label}"
query_aLabel = "prefix urn: <urn:ontoinsights:dna:> select ?aLabel where {urn:syn skos:altLabel ?aLabel}"

verb_conn = stardog.Connection("dna-verbs", **sd_conn_details)
noun_conn = stardog.Connection("dna-nouns", **sd_conn_details)
new_verbs_dict = dict()
poss_verbs_dict = dict()
new_nouns_dict = dict()
poss_nouns_dict = dict()
new_verb_syns = []
new_noun_syns = []

In [4]:
def update_dicts(syn: str, class_name: str, syns: list, known_words: list, verb: bool, conn):
    hypernym_syn_results = conn.select(query_subclass.replace("root", syn), 
                                       content_type='application/sparql-results+json')
    for hyper_bind_set in hypernym_syn_results['results']['bindings']:
        hypernym_syn = (hyper_bind_set['syn']['value']).split(":")[-1]
        if hypernym_syn in syns:
            # Hypernym is already mapped to a DNA concept and will be accounted for when "that" syn is processed
            continue
        # Get labels and alt-labels
        if verb:
            new_verb_syns.append(hypernym_syn)
        else:
            new_noun_syns.append(hypernym_syn)
        label_results = conn.select(query_label.replace("syn", syn), 
                                    content_type='application/sparql-results+json')
        aLabel_results = conn.select(query_aLabel.replace("syn", syn), 
                                     content_type='application/sparql-results+json')
        for label_bind_set in label_results['results']['bindings']:
            label = (label_bind_set['label']['value']).replace("_", " ")
            if label in known_words:
                continue    # Already have the term in DNA
            if verb:
                update_term_value(label, class_name, new_verbs_dict)
            else:
                update_term_value(label, class_name, new_nouns_dict)
        for aLabel_bind_set in aLabel_results['results']['bindings']:
            aLabel = (aLabel_bind_set['aLabel']['value']).replace("_", " ")
            if aLabel in known_words:
                continue    # Already have the term in DNA
            if verb:
                update_term_value(aLabel, class_name, poss_verbs_dict)
            else:
                update_term_value(aLabel, class_name, poss_nouns_dict)
        update_dicts(hypernym_syn, class_name, syns, known_words, verb, conn)    

def update_term_value(term: str, new_class: str, term_dict: dict):
    class_list = [new_class]
    if term in term_dict.keys():
        class_list = term_dict[term]
        if new_class not in class_list:
            class_list.append(new_class)
    term_dict[term] = class_list
        

In [5]:
for syn in verb_syns:
    update_dicts(syn, verb_dict[syn], verb_syns, verb_words, True, verb_conn)
for syn in noun_syns:
    update_dicts(syn, noun_dict[syn], noun_syns, noun_words, False, noun_conn)
        

In [6]:
print(new_verb_syns)

['00696274', '01782485', '01780304', '01782907', '01781522', '01790238', '01790439', '01790579', '01790725', '01791419', '01793089', '01799899', '01824839', '01793315', '00853413', '01806967', '01791528', '01803640', '01807344', '02591574', '02571005', '02591814', '01775115', '01792751', '01792866', '01792983', '00505100', '00027261', '00027563', '00028160', '00735477', '01770125', '01770490', '00893988', '01826198', '01826486', '01826886', '00622598', '00601765', '00602119', '00602298', '00724637', '01825070', '00739481', '01831367', '00443018', '00504650', '00839551', '01783561', '01783720', '01783927', '01784760', '01795082', '01834907', '01786037', '01787967', '00689424', '02300408', '01796442', '00493346', '01777926', '01832012', '01780389', '01809490', '01800935', '01832347', '01809651', '01789061', '01828281', '01828474', '01828678', '01828900', '01829703', '01829904', '01830002', '01830126', '01832198', '02245263', '02245456', '02245663', '00652816', '00652969', '00719845', '00

In [7]:
with open('new_nouns.pickle', 'wb') as outFile:
    pickle.dump(new_nouns_dict, outFile)
with open('poss_nouns.pickle', 'wb') as outFile:
    pickle.dump(poss_nouns_dict, outFile)
with open('new_verbs.pickle', 'wb') as outFile:
    pickle.dump(new_verbs_dict, outFile)
with open('poss_verbs.pickle', 'wb') as outFile:
    pickle.dump(poss_verbs_dict, outFile)

# Find Unused Synsets

In [8]:
query_syn = "prefix urn: <urn:ontoinsights:dna:> select ?syn where {?syn a urn:Synset}"

unaccounted_verb_syns = [] 
syn_results = verb_conn.select(query_syn, content_type='application/sparql-results+json')
print('total verb syns: ', len(syn_results['results']['bindings']))
for bind in syn_results['results']['bindings']:
    syn = (bind['syn']['value']).split(':')[-1]
    if syn in verb_syns or syn in new_verb_syns:
        continue
    unaccounted_verb_syns.append(syn)
print('# verb syns: ', len(unaccounted_verb_syns))

unaccounted_noun_syns = [] 
syn_results = noun_conn.select(query_syn, content_type='application/sparql-results+json')
print('total noun syns: ', len(syn_results['results']['bindings']))
for bind in syn_results['results']['bindings']:
    syn = (bind['syn']['value']).split(':')[-1]
    if syn in noun_syns or syn in new_noun_syns:
        continue
    unaccounted_noun_syns.append(syn)
print("# noun syns: ", len(unaccounted_noun_syns))

total verb syns:  13789
# verb syns:  5779
total noun syns:  82192
# noun syns:  26766


# Track Used Synsets

In [9]:
all_verb_syns = []
all_noun_syns = []

for syn in verb_syns:
    all_verb_syns.append(syn)
for syn in new_verb_syns:
    all_verb_syns.append(syn)
for syn in noun_syns:
    all_noun_syns.append(syn)
for syn in new_noun_syns:
    all_noun_syns.append(syn)
    
all_verb_syns.sort()
all_noun_syns.sort()
    
with open('used_verb_syns.txt', 'w') as outFile:
    for syn in all_verb_syns:
        outFile.write(f'{syn}\n')
with open('used_noun_syns.txt', 'w') as outFile:
    for syn in all_noun_syns:
        outFile.write(f'{syn}\n')

# Duplicates and New Terms from poss_xx

In [9]:
print("verbs")
for syn, dna in new_verbs_dict.items():
    if len(dna) > 1:
        print(syn, dna)
print("nouns")
for syn, dna in new_nouns_dict.items():
    if len(dna) > 1:
        print(syn, dna)

print("poss verbs: ")
poss_verbs = []
new_verbs = list(new_verbs_dict.keys())
for syn, dna in poss_verbs_dict.items():
    if syn not in new_verbs:
        print(syn, dna)
print("poss nouns: ")
poss_nouns = []
new_nouns = list(new_nouns_dict.keys())
for syn, dna in poss_nouns_dict.items():
    if syn not in new_nouns:
        print(syn, dna)

verbs
prize ['urn:ontoinsights:dna:AdmirationRespectAndValue', 'urn:ontoinsights:dna:Cognition']
treasure ['urn:ontoinsights:dna:Fondness', 'urn:ontoinsights:dna:Cognition']
rile ['urn:ontoinsights:dna:AngerAndAnnoyance', 'urn:ontoinsights:dna:IncrementalMovement']
rag ['urn:ontoinsights:dna:AngerAndAnnoyance', 'urn:ontoinsights:dna:Complaint']
strain ['urn:ontoinsights:dna:Anxiety', 'urn:ontoinsights:dna:Attempt', 'urn:ontoinsights:dna:UtilizationAndConsumption']
extend ['urn:ontoinsights:dna:Anxiety', 'urn:ontoinsights:dna:UtilizationAndConsumption']
blur ['urn:ontoinsights:dna:ConfusionAndDoubt', 'urn:ontoinsights:dna:Decrease']
obnubilate ['urn:ontoinsights:dna:ConfusionAndDoubt', 'urn:ontoinsights:dna:Concealment']
engage ['urn:ontoinsights:dna:Curiosity', 'urn:ontoinsights:dna:ImpactAndContact', 'urn:ontoinsights:dna:IncrementalMovement']
disdain ['urn:ontoinsights:dna:HatredBitternessAndDisgust', 'urn:ontoinsights:dna:RefusalAndRejection']
miss ['urn:ontoinsights:dna:LongingAndP

yellow-blue dichromacy ['urn:ontoinsights:dna:EventAndState', 'urn:ontoinsights:dna:Disability', 'urn:ontoinsights:dna:EnvironmentAndCondition']
yellow-blue color blindness ['urn:ontoinsights:dna:EventAndState', 'urn:ontoinsights:dna:Disability', 'urn:ontoinsights:dna:EnvironmentAndCondition']
congenital heart defect ['urn:ontoinsights:dna:EventAndState', 'urn:ontoinsights:dna:EnvironmentAndCondition']
septal defect ['urn:ontoinsights:dna:EventAndState', 'urn:ontoinsights:dna:EnvironmentAndCondition']
sphere ['urn:ontoinsights:dna:EventAndState', 'urn:ontoinsights:dna:Cognition']
MP ['urn:ontoinsights:dna:PoliceForce', 'urn:ontoinsights:dna:Person']
bodyguard ['urn:ontoinsights:dna:ArmedForce', 'urn:ontoinsights:dna:Person', 'urn:ontoinsights:dna:GroupOfAgents']
artillery ['urn:ontoinsights:dna:ArmedForce', 'urn:ontoinsights:dna:WeaponAndAmmunition']
foot ['urn:ontoinsights:dna:ArmedForce', 'urn:ontoinsights:dna:BodyPart']
style ['urn:ontoinsights:dna:BodyPart', 'urn:ontoinsights:dna:C

dilatation ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:Increase']
blockage ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:Prevention']
wealth ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:Possession']
slump ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:EconomicEnvironment']
economic crisis ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:EconomicEnvironment']
problem ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:Cognition']
depilation ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:MaintenanceAndRepair']
contamination ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:Change']
flavor ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:AttentionAndFocus']
flavour ['urn:ontoinsights:dna:EnvironmentAndCondition', 'urn:ontoinsights:dna:AttentionAndFocus']
immunity ['urn:ontoinsights:

tubercle ['urn:ontoinsights:dna:BodyPart']
endocrine ['urn:ontoinsights:dna:BodyPart']
ball ['urn:ontoinsights:dna:BodyPart', 'urn:ontoinsights:dna:ToyAndGamingItem', 'urn:ontoinsights:dna:RecreationEvent']
sucker ['urn:ontoinsights:dna:BodyPart', 'urn:ontoinsights:dna:Animal']
neb ['urn:ontoinsights:dna:BodyPart']
ab ['urn:ontoinsights:dna:BodyPart']
crotch ['urn:ontoinsights:dna:BodyPart']
fanny ['urn:ontoinsights:dna:BodyPart']
sinus ['urn:ontoinsights:dna:BodyPart']
ventricle ['urn:ontoinsights:dna:BodyPart']
rib ['urn:ontoinsights:dna:BodyPart']
cervix ['urn:ontoinsights:dna:BodyPart']
pallium ['urn:ontoinsights:dna:BodyPart']
auricle ['urn:ontoinsights:dna:BodyPart']
toe ['urn:ontoinsights:dna:BodyPart']
breast ['urn:ontoinsights:dna:BodyPart']
human face ['urn:ontoinsights:dna:BodyPart']
thorax ['urn:ontoinsights:dna:BodyPart']
mentum ['urn:ontoinsights:dna:BodyPart']
parenchyma ['urn:ontoinsights:dna:BodyPart']
nervure ['urn:ontoinsights:dna:BodyPart']
Tokay ['urn:ontoinsights:

salmon trout ['urn:ontoinsights:dna:Animal']
whitefish ['urn:ontoinsights:dna:Animal']
hind ['urn:ontoinsights:dna:Animal']
snapper ['urn:ontoinsights:dna:Animal']
tuna ['urn:ontoinsights:dna:Animal']
topminnow ['urn:ontoinsights:dna:Animal']
catfish ['urn:ontoinsights:dna:Animal']
bullhead ['urn:ontoinsights:dna:Animal']
ribbonfish ['urn:ontoinsights:dna:Animal']
rockfish ['urn:ontoinsights:dna:Animal']
greenling ['urn:ontoinsights:dna:Animal']
perch ['urn:ontoinsights:dna:Animal']
sunfish ['urn:ontoinsights:dna:Animal']
black bass ['urn:ontoinsights:dna:Animal']
surffish ['urn:ontoinsights:dna:Animal']
surf fish ['urn:ontoinsights:dna:Animal']
moonfish ['urn:ontoinsights:dna:Animal']
dollarfish ['urn:ontoinsights:dna:Animal']
whiting ['urn:ontoinsights:dna:Animal']
sea trout ['urn:ontoinsights:dna:Animal']
pout ['urn:ontoinsights:dna:Animal']
eelpout ['urn:ontoinsights:dna:Animal']
gudgeon ['urn:ontoinsights:dna:Animal']
bonito ['urn:ontoinsights:dna:Animal']
billfish ['urn:ontoinsig

woodruff ['urn:ontoinsights:dna:FungusAndPlant']
mimosa ['urn:ontoinsights:dna:FungusAndPlant']
milk thistle ['urn:ontoinsights:dna:FungusAndPlant']
hollyhock ['urn:ontoinsights:dna:FungusAndPlant']
false mallow ['urn:ontoinsights:dna:FungusAndPlant']
bearberry ['urn:ontoinsights:dna:FungusAndPlant']
manzanita ['urn:ontoinsights:dna:FungusAndPlant']
huckleberry ['urn:ontoinsights:dna:FungusAndPlant']
rockrose ['urn:ontoinsights:dna:FungusAndPlant']
rock rose ['urn:ontoinsights:dna:FungusAndPlant']
service tree ['urn:ontoinsights:dna:FungusAndPlant']
spirea ['urn:ontoinsights:dna:FungusAndPlant']
spiraea ['urn:ontoinsights:dna:FungusAndPlant']
honeysuckle ['urn:ontoinsights:dna:FungusAndPlant']
smoke tree ['urn:ontoinsights:dna:FungusAndPlant']
buckthorn ['urn:ontoinsights:dna:FungusAndPlant']
croton ['urn:ontoinsights:dna:FungusAndPlant']
cedar tree ['urn:ontoinsights:dna:FungusAndPlant']
hemlock ['urn:ontoinsights:dna:FungusAndPlant']
crab apple ['urn:ontoinsights:dna:FungusAndPlant']

drain ['urn:ontoinsights:dna:UtilizationAndConsumption']
foul ['urn:ontoinsights:dna:ViolationOfAgreement']
air current ['urn:ontoinsights:dna:WeatherEvent']
length ['urn:ontoinsights:dna:Duration']
longness ['urn:ontoinsights:dna:Duration']
prolongation ['urn:ontoinsights:dna:Duration']
lengthiness ['urn:ontoinsights:dna:Duration']
shortness ['urn:ontoinsights:dna:Duration']
minute ['urn:ontoinsights:dna:PointInTime']
small fry ['urn:ontoinsights:dna:Childhood']
tike ['urn:ontoinsights:dna:Childhood']
tyke ['urn:ontoinsights:dna:Childhood']
rascal ['urn:ontoinsights:dna:Childhood']
rapscallion ['urn:ontoinsights:dna:Childhood']
scalawag ['urn:ontoinsights:dna:Childhood']
scallywag ['urn:ontoinsights:dna:Childhood']
holy day ['urn:ontoinsights:dna:Holiday']
bombardment ['urn:ontoinsights:dna:AttackDamageAndAssault']
capitalization ['urn:ontoinsights:dna:AssessmentAndCharacterization', 'urn:ontoinsights:dna:PurchaseAndSale']
capitalisation ['urn:ontoinsights:dna:AssessmentAndCharacteriz