## 'Named Rock Unit'

In [1]:
import rdflib
from collections import Counter

In [9]:
g = rdflib.Graph().parse('./data/Lexicon_NamedRockUnit.nt', format='nt')

In [10]:
hasLithogenesis_triples = [(s, p, o) for (s, p, o) in g if str(p).endswith('hasLithogenesis')]
labels = {
    label for (label, count) in Counter(o for (s, p, o) in hasLithogenesis_triples).most_common(2)
}
labels

{rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/LithogeneticType/FLUV'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/LithogeneticType/GLACI')}

In [13]:
instances_triples = {(s, p, o) for (s, p, o) in g if str(p).endswith('hasLithogenesis') and o in labels}
instances = [s for (s, _, _) in instances_triples]
print('number of instances: ', len(instances))
assert len(set(instances)) == len(instances)

number of instances:  146


In [46]:
# creating a subset of the graph
sub_instances = instances[:3]
sub_g = [(s, p, o) for (s, p, o) in g if s in sub_instances]
sub_instances_triples = [(s, p, o) for (s, p, o) in sub_g]
sub_instances_triples

[(rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/RLTI'),
  rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'),
  rdflib.term.Literal('Red Lion Till Member', lang='en')),
 (rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/TPGR'),
  rdflib.term.URIRef('http://data.bgs.ac.uk/ref/Lexicon/hasRevisionNumber'),
  rdflib.term.Literal('0', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer'))),
 (rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/HOTI'),
  rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#inScheme'),
  rdflib.term.URIRef('http://data.bgs.ac.uk/ref/Lexicon/NamedRockUnit')),
 (rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/HOTI'),
  rdflib.term.URIRef('http://data.bgs.ac.uk/ref/Lexicon/hasLithologyComponent'),
  rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/LithologyComponent/HOTI_Z')),
 (rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/TPGR'),

In [53]:
from collections import defaultdict
from pprint import pprint

V = []
E = []
depth = 2
labels = defaultdict()
vMap = []
eMap = []
search_front = []
V_sub = defaultdict()
E_sub = defaultdict()


# 1. Initialization
for i in sub_instances:
    V.append(i)
    labels[(i, depth)] = ''
    vMap.append(i)
# 2. Subgraph Extraction
for i in sub_instances:
    search_front.append(i)
    for i in range(d - 1, 0, -1):
        new_search_front = []
        for r in search_front:
            triples = [(s, p, o) for (s, p, o) in sub_g if s == r]
            for (s, p, o) in triples:
                new_search_front.append(o)
                if o not in vMap:
                    V.append(o)
                    vMap.append(o)
                labels[(o), depth] = o
                if o not in V_sub.keys():
                    V_sub[o] = depth
                if (s, p, o) not in eMap:
                    E.append((s, p, o))
                    eMap.append((s, p, o))
                labels[(s, p, o)] = p
                if (s, p, o) not in E_sub:
                    E_sub[(s, p, o)] = depth
        search_front = new_search_front
pprint(V)
pprint(labels)
pprint(vMap)

[rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/RLTI'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/TPGR'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/HOTI'),
 rdflib.term.Literal('Red Lion Till Member', lang='en'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/ref/Lexicon/NamedRockUnit'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Geochronology/Division/Q2'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/RockUnitRank/M'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/SpatialScope/NotApplicable'),
 rdflib.term.Literal('FORMAL', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),
 rdflib.term.URIRef('http://data.bgs.ac.uk/ref/CurrentStatus/CURRENT'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/Theme/SPRFCL'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/LithogeneticType/GLACI'),
 rdflib.term.URIRef('http://data.bgs.ac.uk/id/Lexicon/NamedRockUnit/HOLD'),
 rdflib.te