In [2]:
from loaders import load_graph, graph_folder_path
import os
import json

In [3]:
graph_list = list(filter(lambda x: x.endswith(".triples"), os.listdir(graph_folder_path)))

In [4]:
graph_list

['marvel.triples',
 'mcu.triples',
 'memoryalpha.triples',
 'memorybeta.triples',
 'starwars.triples',
 'stexpanded.triples',
 'swg.triples',
 'swtor.triples']

In [5]:
import random
import pandas as pd


def filter_edges(graph, id2name, edge_type):
    edges = list()
    for e in graph.edges(data=True):
        edge_t = id2name[e[2]["edge_label"]]
        if edge_t == edge_type:
            edges.append([
                id2name[e[0]],
                id2name[e[1]],
                id2name[e[2]["edge_label"]],
            ])
    return edges

def is_it_string(row):
    if row[0].startswith("http") and row[0].startswith("http"):
        return False
    return True

def get_string(row):
    strs = list()
    if not row[0].startswith("http"):
        strs.append(row[0])
    if not row[1].startswith("http"):
        strs.append(row[1])
    return strs

def edges_to_dict(graph):
    edge_container = defaultdict(list)
    for e in graph.edges(data=True):
        edge_container[e[2]["edge_label"]].append([e[0], e[1]])
    return edge_container

def convert2name(row, id2name):
    row_named = list()
    for element in row:
        row_named.append(id2name[element])
    return row_named

def drop_duplicates(rows):
    return pd.DataFrame(rows, columns=["a", "b"]).drop_duplicates(["a"]).values.tolist()

In [None]:
from collections import defaultdict
from tqdm import tqdm
import numpy as np

output_path = "/home/vassm/entity_alignment/kg_entity_alignment_2024/stats/edge_relevance"
if not os.path.exists(output_path):
    os.makedirs(output_path)

for graph_name in graph_list:
    print(graph_name)
    graph, name2id = load_graph(graph_name)

    id2name = dict((v,k) for k,v in name2id.items())

    with open(os.path.join("/home/vassm/entity_alignment/kg_entity_alignment_2024/stats/edge_counts", graph_name.replace(".triples", "_edge_count.json")), "r") as f:
        edge_counts = json.load(f)

    graph2edge_dict = edges_to_dict(graph)

    text_edge_container2 = defaultdict(set)

    for edge_type_tuple in tqdm(edge_counts):
        only_this_type_edges = graph2edge_dict[name2id[edge_type_tuple[0]]]
        for sample in only_this_type_edges:
            sample_named = convert2name(sample, id2name)
            if is_it_string(sample_named):
                for element in get_string(sample_named):
                    text_edge_container2[edge_type_tuple[0]].add(element)

    output_file = graph_name.replace(".triples", "_edge_relevance.json")

    text_edge_container3 = dict()
    for k,v in text_edge_container2.items():
        text_edge_container3[k] = list(v)
    with open(os.path.join(output_path, output_file), "w") as f:
        json.dump(text_edge_container3, f)

marvel.triples


In [None]:
edge_relevances = list()
for file in os.listdir(output_path):

    with open(os.path.join(output_path, file), "r") as f:
        edge_relevance = json.load(f)

    edge_relevances.append([file, edge_relevance])

## marvel:
endmonth
startyear
year
featured

In [50]:
edge_relevances

[['marvel_edge_relevance.json',
  {'http://dbkwik.webdatacommons.org/marvel.wikia.com/property/endmonth': [['June',
     'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/War_Machine:_Brothers_in_Arms_Vol_1']],
   'http://dbkwik.webdatacommons.org/marvel.wikia.com/property/videoGame': [['6109',
     'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/Punisher_(Robot)']],
   'http://dbkwik.webdatacommons.org/marvel.wikia.com/property/charref': [['Teleporter',
     'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/Reed_Richards_(Earth-944)/Power_Grid']],
   'http://dbkwik.webdatacommons.org/marvel.wikia.com/property/universe': [['Earth-616',
     'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/N%27Baza_(Earth-616)']],
   'http://dbkwik.webdatacommons.org/marvel.wikia.com/property/mainPicSize': [['350',
     'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/Sentinel_9']],
   'http://dbkwik.webdatacommons.org/marvel.wikia.com/property/cross

In [29]:
i = 4

def filter_ints(elements):
    not_ints = list()
    for e in elements:
        try:
            float(e)
        except Exception:
            not_ints.append(e)
    # for e in elements:
    #     if not e.isnumeric():
    #         not_ints.append(e)
    return not_ints


edge_relevances_filtered = dict()

print(edge_relevances[i][0])
for k,v in edge_relevances[i][1].items():
    v2 = filter_ints(v)
    if len(v2) < 5:
        continue
    #print("#####")
    #print("EDGE:", k)
    #print(len(v2))
    edge_relevances_filtered[k] = v2
    #print(random.sample(v2, k=np.min([20, len(v2)])))

for item in sorted(edge_relevances_filtered.items(), key=lambda x: len(x[1])):
    print("#####")
    print(item[0])
    print(len(item[1]))
    print(random.sample(item[1], k=np.min([20, len(item[1])])))

starwars_edge_relevance.json
#####
http://dbkwik.webdatacommons.org/starwars.wikia.com/property/bays
5
['Nine', 'At least one', 'Equipped', 'Supply Dock', 'None']
#####
http://dbkwik.webdatacommons.org/starwars.wikia.com/property/format
5
['Monthly', 'Miniseries', 'Trade paperback', 'Story arc', 'Completed']
#####
http://dbkwik.webdatacommons.org/starwars.wikia.com/property/canon
5
['Metellos-Ilum hyperspace route', 'Ambiguous', 'C-canon', 'Ion exhaust', 'N/A']
#####
http://dbkwik.webdatacommons.org/starwars.wikia.com/property/voice
5
['Star Wars: The Clone Wars', 'Star Wars: Droids', 'Digimon', 'Phineas and Ferb', 'Solo: A Star Wars Story']
#####
http://dbkwik.webdatacommons.org/starwars.wikia.com/property/civilian
5
['Hunters', 'Moderate', 'Heavy', 'None', 'High']
#####
http://dbkwik.webdatacommons.org/starwars.wikia.com/property/description
5
["Creating & Naming 'The Devious Neimoidians'", 'Beverage', 'Amber', 'Naming the Cloud City Wing Guards', 'Small']
#####
http://dbkwik.webdata

In [23]:
'100.0'.isnumeric()

False

# Individual

In [66]:
graph_name = graph_list[0]
graph, name2id = load_graph(graph_name)

id2name = dict((v,k) for k,v in name2id.items())

with open(os.path.join("/home/vassm/entity_alignment/kg_entity_alignment_2024/stats/edge_counts", graph_name.replace(".triples", "_edge_count.json")), "r") as f:
    edge_counts = json.load(f)

KeyboardInterrupt: 

In [53]:
graph2edge_dict = edges_to_dict(graph)

In [43]:
from collections import defaultdict
from tqdm import tqdm

text_edge_container = defaultdict(list)

for edge_type_tuple in tqdm(edge_counts):
    only_this_type_edges = filter_edges(graph, id2name, edge_type_tuple[0])
    subsample = random.choices(only_this_type_edges, k=10)
    for sample in subsample:
        if is_it_string(sample):
            text_edge_container[edge_type_tuple[0]].append(sample)


In [62]:
from collections import defaultdict
from tqdm import tqdm
import numpy as np

text_edge_container2 = defaultdict(list)

for edge_type_tuple in tqdm(edge_counts):
    only_this_type_edges = graph2edge_dict[name2id[edge_type_tuple[0]]]
    subsample = random.sample(only_this_type_edges, k=np.min([10, len(only_this_type_edges)]))
    for sample in subsample:
        sample_named = convert2name(sample, id2name)
        if is_it_string(sample_named):
            text_edge_container2[edge_type_tuple[0]].append(sample_named)


100%|██████████| 141/141 [00:00<00:00, 8937.94it/s]


In [64]:
for k,v in text_edge_container2.items():
    print("#####")
    print(k)
    for val in v:
        print(val)

#####
http://dbkwik.webdatacommons.org/marvel.wikia.com/property/a1ternate
['807128', 'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/Alpha_Flight']
#####
http://dbkwik.webdatacommons.org/marvel.wikia.com/property/videoGame
['6109', 'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/Punisher_(Robot)']
#####
http://dbkwik.webdatacommons.org/marvel.wikia.com/property/endmonth
['June', 'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/War_Machine:_Brothers_in_Arms_Vol_1']
#####
http://dbkwik.webdatacommons.org/marvel.wikia.com/property/mainimagesize
['250', 'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/Johann_Shmidt_(Clone)']
#####
http://dbkwik.webdatacommons.org/marvel.wikia.com/property/lastissue
['12', 'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/Red_Prophet:_The_Tales_of_Alvin_Maker_Vol_1']
['6', 'http://dbkwik.webdatacommons.org/marvel.wikia.com/resource/Hedge_Knight_2:_Sworn_Sword_Vol_1']
['12', 'http://dbkwik.webdatacommo

In [55]:
only_this_type_edges = graph2edge_dict[name2id["http://dbkwik.webdatacommons.org/marvel.wikia.com/property/videoGame"]]
only_this_type_edges

[[1003486, 1051153]]

In [71]:
only_this_type_edges = graph2edge_dict[name2id["http://www.w3.org/2000/01/rdf-schema#comment"]]
only_this_type_edges

[[600950, 1124243],
 [600973, 1052034],
 [600983, 1181657],
 [600991, 1054660],
 [601001, 1060236],
 [601014, 1158378],
 [601058, 1128937],
 [601062, 1106878],
 [601065, 1060236],
 [601090, 1060236],
 [601131, 1305561],
 [601203, 1190394],
 [601286, 1138541],
 [601337, 1290191],
 [601349, 1084771],
 [601384, 1086416],
 [601389, 1181879],
 [601538, 1087077],
 [601572, 1245949],
 [601652, 1133120],
 [601672, 1100091],
 [601702, 1170953],
 [601707, 1108972],
 [601782, 1082698],
 [601840, 1366961],
 [601898, 1108121],
 [601911, 1194958],
 [601916, 1148390],
 [601926, 1365754],
 [601927, 1174648],
 [601981, 1096032],
 [601984, 1556901],
 [602039, 1504486],
 [602050, 1696836],
 [602131, 1290275],
 [602142, 1791863],
 [602194, 1353537],
 [602198, 1247272],
 [602214, 1992880],
 [602311, 1172415],
 [602325, 1128820],
 [602333, 1185218],
 [602374, 1271569],
 [602506, 1119490],
 [602549, 1459571],
 [602562, 1451455],
 [602610, 1100651],
 [602716, 1236999],
 [602755, 1269277],
 [602808, 1183820],


In [82]:
res = drop_duplicates(graph2edge_dict[name2id["http://www.w3.org/2000/01/rdf-schema#comment"]])

In [84]:
res.tolist()

[[600950, 1124243],
 [600973, 1052034],
 [600983, 1181657],
 [600991, 1054660],
 [601001, 1060236],
 [601014, 1158378],
 [601058, 1128937],
 [601062, 1106878],
 [601065, 1060236],
 [601090, 1060236],
 [601131, 1305561],
 [601203, 1190394],
 [601286, 1138541],
 [601337, 1290191],
 [601349, 1084771],
 [601384, 1086416],
 [601389, 1181879],
 [601538, 1087077],
 [601572, 1245949],
 [601652, 1133120],
 [601672, 1100091],
 [601702, 1170953],
 [601707, 1108972],
 [601782, 1082698],
 [601840, 1366961],
 [601898, 1108121],
 [601911, 1194958],
 [601916, 1148390],
 [601926, 1365754],
 [601927, 1174648],
 [601981, 1096032],
 [601984, 1556901],
 [602039, 1504486],
 [602050, 1696836],
 [602131, 1290275],
 [602142, 1791863],
 [602194, 1353537],
 [602198, 1247272],
 [602214, 1992880],
 [602311, 1172415],
 [602325, 1128820],
 [602333, 1185218],
 [602374, 1271569],
 [602506, 1119490],
 [602549, 1459571],
 [602562, 1451455],
 [602610, 1100651],
 [602716, 1236999],
 [602755, 1269277],
 [602808, 1183820],


In [6]:
graph_list = os.listdir(graph_folder_path)

In [7]:
graph_list

['marvel.xml',
 'mcu.xml',
 'memoryalpha.xml',
 'memorybeta.xml',
 'starwars.xml',
 'stexpanded.xml',
 'swg.xml',
 'swtor.xml',
 'original_triples',
 'marvel.triples',
 'marvel_mapping.json',
 'mcu.triples',
 'mcu_mapping.json',
 'memoryalpha.triples',
 'memoryalpha_mapping.json',
 'memorybeta.triples',
 'memorybeta_mapping.json',
 'starwars.triples',
 'starwars_mapping.json',
 'stexpanded.triples',
 'stexpanded_mapping.json',
 'swg.triples',
 'swg_mapping.json',
 'swtor.triples',
 'swtor_mapping.json']

In [12]:
import rdflib
from loaders import graph_folder_path
from rdflib import URIRef, RDFS, Namespace

rdfschema_NS = Namespace("http://www.w3.org/2000/01/rdf-schema#")

rdf_g = rdflib.Graph()
rdf_g.parse(os.path.join(graph_folder_path, "swtor.xml"))

<Graph identifier=N1ff026cc15ff44c3bf21569937e74243 (<class 'rdflib.graph.Graph'>)>

In [13]:
links = list()
for s, p, o in rdf_g.triples((None, rdfschema_NS.comment, None)):
    links.append([s, p, o])

In [14]:
len(links)

3727

In [25]:
links

[[rdflib.term.URIRef('http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Zythmnr'),
  rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#comment'),
  rdflib.term.Literal('{{ #if: |\nZythmnr was a male Massassi Sith apprentice\n\n\n', lang='en')],
 [rdflib.term.URIRef('http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Darth_Malgus'),
  rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#comment'),
  rdflib.term.Literal("Darth Malgus, born under the name Veradun and later nicknamed The False Emperor, was a Human male Sith Lord of the Sith Empire during the Great Galactic War, Cold War and the Second Great Galactic War. He was born under the name Veradun within Imperial space on Dromund Kaas, and raised by his adoptive father. While still young, Veradun killed a Twi'lek servant on his father's estate, revealing the dark will behind his sensitivity to The Force. The young boy was subsequently sent to the Sith Academy on the Imperial capital of Dromund Kaas, where 

In [10]:
from collections import defaultdict
graph_name = "swtor.triples"
graph, name2id = load_graph(graph_name)

id2name = dict((v,k) for k,v in name2id.items())
graph2edge_dict = edges_to_dict(graph)

In [22]:
comment_links = graph2edge_dict[name2id["http://www.w3.org/2000/01/rdf-schema#comment"]]
len(comment_links)

2010

In [23]:
comment_links

[[28, 33],
 [46, 62],
 [49, 18159],
 [53, 23653],
 [57, 21954],
 [59, 26136],
 [65, 17386],
 [69, 18456],
 [70, 23192],
 [71, 14733],
 [72, 34131],
 [75, 27407],
 [80, 29862],
 [81, 155],
 [86, 18105],
 [89, 12887],
 [90, 10093],
 [93, 16220],
 [98, 25345],
 [99, 31752],
 [106, 12110],
 [115, 31098],
 [118, 10288],
 [120, 23563],
 [121, 19825],
 [122, 12976],
 [124, 28537],
 [126, 16235],
 [127, 13513],
 [137, 30841],
 [141, 16007],
 [143, 23622],
 [147, 19974],
 [148, 16293],
 [156, 9382],
 [160, 12392],
 [161, 22493],
 [162, 21651],
 [170, 10169],
 [177, 15007],
 [180, 20892],
 [181, 20386],
 [189, 17250],
 [199, 30762],
 [200, 23760],
 [201, 26515],
 [202, 13093],
 [203, 10638],
 [208, 24812],
 [217, 14287],
 [227, 25779],
 [228, 29197],
 [238, 20904],
 [250, 14606],
 [251, 15556],
 [252, 17052],
 [253, 17290],
 [254, 16188],
 [255, 18246],
 [258, 19141],
 [260, 21289],
 [262, 21897],
 [265, 22225],
 [267, 24453],
 [268, 24917],
 [269, 25139],
 [270, 25729],
 [271, 22350],
 [273, 26

In [24]:
for row in comment_links:
    print(convert2name(row, id2name))

['http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Senator_Evran', 'Evran on Wookieepedia']
['http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Qual', "{{ #if: |\nQual was a male Selkath Jedi Master who served on the Dantooine Enclave's Jedi Council\xa0 in the years following the Great Sith War. Qual was active on the Enclave Council during the Great Hunt of 3,995 BBY when Jedi Knights traveled the galaxy slaying Sith-created beasts left over from the war. After the official end of the Great Hunt, Qual was present when the Jedi Enclave Council notified Jedi Knights Duron Qel-Droma, Shaela Nuur and Guun Han Saresh that they were being sent to cleanse the Sith homeworld of Korriban of terentateks. Although some on the Council disagreed with the mission, Qual saw it to be tactically sound and believed in the wisdom of the Jedi High Council, which had chosen the three Jedi. However, Qel-Droma, Nuur and Saresh never returned.\n"]
['http://dbkwik.webdatacommons.org/swtor.wik

In [27]:
len(list(graph.edges()))

107370

In [28]:
counter = 0
for s, p, o in rdf_g.triples((None, None, None)):
    counter += 1

print(counter)

146148


# Browse graph

In [56]:
gname = "swtor.triples"
G, name2id_mapping = load_graph(gname)
id2name_mapping = dict((v,k) for k,v in name2id_mapping.items())

In [60]:
e1s = set()
e2s = set()
for triple in G.edges(data=True):
    if "property/hair" in id2name_mapping[triple[2]["edge_label"]]:
        print(id2name_mapping[triple[0]], id2name_mapping[triple[1]], id2name_mapping[triple[2]["edge_label"]])
        e1s.add(id2name_mapping[triple[0]])
        e2s.add(id2name_mapping[triple[1]])

None http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Amzartho http://dbkwik.webdatacommons.org/swtor.wikia.com/property/hair
None http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Eldo http://dbkwik.webdatacommons.org/swtor.wikia.com/property/hair
None http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Rhon_Jun%27yon http://dbkwik.webdatacommons.org/swtor.wikia.com/property/hair
None http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Yadira_Ban http://dbkwik.webdatacommons.org/swtor.wikia.com/property/hair
None http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Hedrow http://dbkwik.webdatacommons.org/swtor.wikia.com/property/hair
None http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Karu http://dbkwik.webdatacommons.org/swtor.wikia.com/property/hair
None http://dbkwik.webdatacommons.org/swtor.wikia.com/resource/Seferiss http://dbkwik.webdatacommons.org/swtor.wikia.com/property/hair
None http://dbkwik.webdatacommons.org/swtor.wikia.com/res

In [62]:
list(filter(lambda x: "http" not in x, e1s))

['Blond', 'Gray', 'None', 'Red', 'Brown', 'Black']

In [63]:
list(filter(lambda x: "http" not in x, e2s))

['bald',
 'Blond',
 'Gray',
 'Red',
 'Grey',
 'Bald',
 'White',
 'Light Brown',
 'Brown',
 'Black',
 'Black/grey']

In [64]:
str(None)

'None'