Permalink
Browse files

stored correct values for path and template - highly unoptimized

  • Loading branch information...
1 parent f774f7e commit 1130286b4246f5c7a4b9da8dbe4d7659069b6ea7 @moret committed Mar 16, 2012
Showing with 69 additions and 16 deletions.
  1. +37 −0 read.py
  2. +32 −16 scraps.py → store.py
View
@@ -0,0 +1,37 @@
+import redis
+
+def print_matrix(nodes, paths):
+ print 'matrix:'
+ r = redis.StrictRedis()
+ s = ''
+ for i, path in enumerate(paths):
+ for j, node in enumerate(nodes):
+ entry = r.hget('row:%d' % i, j)
+ if entry:
+ s += entry
+ else:
+ s += ' . '
+ s += ' '
+ s += '\n'
+ print s
+
+def print_index(l_name, l):
+ print '%s:' % l_name
+ for i, entry in enumerate(l):
+ print '%d: %s' % (i, entry)
+ print
+
+def main():
+ r = redis.StrictRedis()
+ nodes = r.lrange('nodes', 0, -1)
+ paths = r.lrange('paths', 0, -1)
+ templates = r.lrange('templates', 0, -1)
+
+ # print_index('nodes', nodes)
+ print_index('paths', paths)
+ print_index('templates', templates)
+
+ print_matrix(nodes, paths)
+
+if __name__ == "__main__":
+ main()
@@ -1,13 +1,14 @@
import rdflib
import redis
-def set_from_generator(generator):
+def list_from_generator(generator):
l = []
for item in generator:
l.append(item)
- return set(l)
+ return l
def mark_and_enqueue(path, edge, node, qeue, marked):
+ # copy current path to store modified version
node_path = list(path)
if edge != None:
node_path.append(edge)
@@ -16,19 +17,25 @@ def mark_and_enqueue(path, edge, node, qeue, marked):
marked.append(node)
def find_nodes_sources_and_sinks(graph):
- subjects = set_from_generator(graph.subjects())
- objects = set_from_generator(graph.objects())
- return subjects.union(objects), subjects - objects, objects - subjects
+ subjects = list_from_generator(graph.subjects())
+ objects = list_from_generator(graph.objects())
+
+ # TODO arrumar forma decente de guardar valores unicos em array
+ nodes = list(set(subjects).union(set(objects)))
+ sources = list(set(subjects) - set(objects))
+ sinks = list(set(objects) - set(subjects))
+
+ return nodes, sources, sinks
def store_hash_list(l_name, l):
r = redis.StrictRedis()
r.delete(l_name)
for i in l:
- r.lpush(l_name, i)
+ r.rpush(l_name, i)
def find_paths_templates(graph, sources, sinks):
- paths = set()
- templates = set()
+ paths = []
+ templates = []
for source in sources:
path = []
@@ -38,24 +45,33 @@ def find_paths_templates(graph, sources, sinks):
while len(qeue) > 0:
current_node, current_path = qeue.pop()
if current_node in sinks:
- paths.add(tuple(current_path))
- templates.add(tuple(current_path[1::2]))
+ paths.append(tuple(current_path))
+ templates.append(tuple(current_path[1::2]))
for edge, node in graph.predicate_objects(current_node):
if node not in marked:
mark_and_enqueue(current_path, edge, node, qeue, marked)
- return paths, templates
+ # TODO arrumar forma decente de guardar valores unicos em array
+ unique_paths = list(set(paths))
+ unique_templates = list(set(templates))
+ return unique_paths, unique_templates
-def store_sparse_rdf(nodes, paths, templates):
+def build_sparse_rdf(nodes, paths, templates):
r = redis.StrictRedis()
for i, path in enumerate(paths):
- r.delete('path:%d' % i)
+ r.delete('row:%d' % i)
for j, node in enumerate(nodes):
if node in path:
- r.hset('path:%d' % i, j, (j, i))
+ nodes_template = path[::2]
+ path_template = path[1::2]
+ p = nodes_template.index(node) # posicao do node no path
+ l = len(nodes_template) # quantos nodes tem no path
+ t = templates.index(path_template) # qual o template
+ r.hset('row:%d' % i, j, (p, l, t))
def main():
print 'parsing graph...'
+ # parsing the whole graph at once is dumb - it never ends for large sets
graph = rdflib.Graph()
graph.parse('paper.nt', format='nt')
# graph.parse('NTN-individuals.owl')
@@ -75,8 +91,8 @@ def main():
store_hash_list('paths', paths)
store_hash_list('templates', templates)
- print 'storing sparse matrix...'
- store_sparse_rdf(nodes, paths, templates)
+ print 'building sparse matrix...'
+ build_sparse_rdf(nodes, paths, templates)
print 'done - bye!'

0 comments on commit 1130286

Please sign in to comment.