Skip to content

Commit

Permalink
Merge pull request #9 from microsoft/remove-metadata-type-registry
Browse files Browse the repository at this point in the history
Removed metadata type registry
  • Loading branch information
Dwayne Pryce committed Feb 11, 2020
2 parents 5d26b03 + bdbfa98 commit 9f75023
Show file tree
Hide file tree
Showing 16 changed files with 336 additions and 517 deletions.
34 changes: 20 additions & 14 deletions notebooks/bipartite.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,21 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"pycharm": {
"is_executing": false,
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sally Field worked on a movie with Tom Hanks? True\n",
"Graph is directed: False\n",
"['Bill Paxton', 'Steve Martin', 'Sally Field', 'Tom Hanks', 'John Candy', 'Kevin Bacon', 'Kathleen Quinlan']\n"
],
"output_type": "stream"
"['Steve Martin', 'Kathleen Quinlan', 'Tom Hanks', 'Kevin Bacon', 'Sally Field', 'John Candy', 'Bill Paxton']\n"
]
}
],
"source": [
Expand All @@ -71,14 +77,14 @@
"print(f\"Sally Field worked on a movie with Tom Hanks? {'Sally Field' in graph['Tom Hanks']}\")\n",
"print(f\"Graph is directed: {graph.is_directed()}\")\n",
"print(f\"{graph.nodes()}\")\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n",
"is_executing": false
}
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -102,13 +108,13 @@
"pycharm": {
"stem_cell": {
"cell_type": "raw",
"source": [],
"metadata": {
"collapsed": false
}
},
"source": []
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
}
11 changes: 5 additions & 6 deletions notebooks/complex_io.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
"some business rules we've put in place.\n",
"\n",
"the `topologic.io.from_dataset` function expects a function of the signature \n",
"`Callable[[nx.Graph, MetadataTypeRegistry], Callable[[List[str]], None]]` to be provided\n",
"`Callable[[nx.Graph], Callable[[List[str]], None]]` to be provided\n",
"\n",
"This is the definition of a function that returns a function that returns a function.\n",
"\n",
Expand Down Expand Up @@ -121,10 +121,9 @@
" source_index: int,\n",
" target_index: int,\n",
" date_index: int\n",
") -> Callable[[nx.Graph, tc.MetadataTypeRegistry], Callable[[List[str]], None]]:\n",
") -> Callable[[nx.Graph], Callable[[List[str]], None]]:\n",
" def _csv_parser_setup(\n",
" graph: nx.Graph, \n",
" metadata_type_registry: tc.MetadataTypeRegistry\n",
" graph: nx.Graph\n",
" ) -> Callable[[List[str]], None]:\n",
" def _process_row(row: List[str]):\n",
" # this processes the current row\n",
Expand Down Expand Up @@ -180,7 +179,7 @@
" dialect=\"excel-tab\"\n",
" )\n",
" \n",
" digraph, _ = tc.io.from_dataset(\n",
" digraph = tc.io.from_dataset(\n",
" csv_dataset=dataset,\n",
" projection_function_generator=projection_function,\n",
" graph=digraph\n",
Expand Down Expand Up @@ -419,7 +418,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
"version": "3.7.3"
},
"pycharm": {
"stem_cell": {
Expand Down
377 changes: 191 additions & 186 deletions notebooks/embeddings.ipynb

Large diffs are not rendered by default.

40 changes: 11 additions & 29 deletions tests/io/test_csv_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import unittest

import csv
from topologic.io import CsvDataset, from_dataset, from_file, GraphContainer, load
from topologic.io import CsvDataset, from_dataset, from_file, load
from topologic import projection
from ..utils import data_file

Expand Down Expand Up @@ -40,7 +40,7 @@ def test_edge(self):
2,
4
)
graph, registry = from_dataset(edge_dataset, proj)
graph = from_dataset(edge_dataset, proj)
self.assertEqual(7, len(graph.nodes))
self.assertEqual(2, graph["jon"]["john"]["weight"])
attributes = graph["jon"]["john"]["attributes"]
Expand All @@ -65,7 +65,6 @@ def test_edge(self):
{"date": "7/2/2018", "subject": "No I'm not Lumberg", "replyCount": "0"},
attributes[4]
)
self.assertEqual(int, registry.attribute_to_type_mapping()["replyCount"])

def test_vertex(self):
with open(data_file("tiny-graph-vertex.csv")) as vertex_file:
Expand All @@ -78,9 +77,8 @@ def test_vertex(self):
vertex_dataset.headers(),
0
)
graph, registry = from_dataset(vertex_dataset, proj)
graph = from_dataset(vertex_dataset, proj)
self.assertEqual(0, len(graph.nodes))
self.assertEqual(0, len(registry.attribute_to_type_mapping()))

def test_edge_then_vertex(self):
with open(data_file("tiny-multigraph.csv")) as edge_file:
Expand All @@ -95,7 +93,7 @@ def test_edge_then_vertex(self):
2,
4
)
graph, registry = from_dataset(edge_dataset, proj)
graph = from_dataset(edge_dataset, proj)

with open(data_file("tiny-graph-vertex.csv")) as vertex_file:
vertex_dataset = CsvDataset(
Expand All @@ -108,10 +106,9 @@ def test_edge_then_vertex(self):
0,
ignored_values=["NULL"]
)
same_graph, new_registry = from_dataset(vertex_dataset, vertex_proj, graph)
same_graph = from_dataset(vertex_dataset, vertex_proj, graph)

self.assertTrue(same_graph == graph)
self.assertTrue(registry != new_registry)

self.assertEqual(7, len(graph.nodes))
self.assertEqual(2, graph["jon"]["john"]["weight"])
Expand All @@ -137,7 +134,6 @@ def test_edge_then_vertex(self):
{"date": "7/2/2018", "subject": "No I'm not Lumberg", "replyCount": "0"},
attributes[4]
)
self.assertEqual(int, registry.attribute_to_type_mapping()["replyCount"])

self.assertDictEqual({"lastName": "larson"}, graph.nodes["jon"]["attributes"][0])
self.assertDictEqual(
Expand Down Expand Up @@ -192,7 +188,7 @@ def test_invalid_arguments_for_vertex(self):

def test_edge_only_collection_projection(self):
with open(data_file("tiny-multigraph.csv")) as edge_file:
container: GraphContainer = from_file(
graph = from_file(
edge_file,
1,
2,
Expand All @@ -201,8 +197,7 @@ def test_edge_only_collection_projection(self):
"excel",
edge_metadata_behavior="collection"
)
graph = container.graph
registry = container.edge_metadata_type_registry

self.assertEqual(7, len(graph.nodes))
self.assertEqual(2, graph["jon"]["john"]["weight"])
attributes = graph["jon"]["john"]["attributes"]
Expand All @@ -227,11 +222,10 @@ def test_edge_only_collection_projection(self):
{"date": "7/2/2018", "subject": "No I'm not Lumberg", "replyCount": "0"},
attributes[4]
)
self.assertEqual(int, registry.attribute_to_type_mapping()["replyCount"])

def test_edge_only_single_projection(self):
with open(data_file("tiny-graph.csv")) as edge_file:
container: GraphContainer = from_file(
graph = from_file(
edge_file,
1,
2,
Expand All @@ -241,20 +235,18 @@ def test_edge_only_single_projection(self):
edge_csv_use_headers=["date", "emailFrom", "emailTo", "subject", "replyCount"],
edge_metadata_behavior="single"
)
graph = container.graph
registry = container.edge_metadata_type_registry

self.assertEqual(7, len(graph.nodes))
self.assertEqual(1, graph["jon"]["john"]["weight"])
attributes = graph["jon"]["john"]["attributes"]
self.assertDictEqual(
{"date": "7/2/2018", "subject": "RE: Graphs are great", "replyCount": "0"},
attributes[0]
)
self.assertEqual(int, registry.attribute_to_type_mapping()["replyCount"])

def test_edge_only_no_metadata_projection(self):
with open(data_file("tiny-graph.csv")) as edge_file:
container: GraphContainer = from_file(
graph = from_file(
edge_file,
1,
2,
Expand All @@ -264,17 +256,14 @@ def test_edge_only_no_metadata_projection(self):
edge_csv_use_headers=["date", "emailFrom", "emailTo", "subject", "replyCount"],
edge_metadata_behavior="none"
)
graph = container.graph
registry = container.edge_metadata_type_registry
self.assertEqual(7, len(graph.nodes))
self.assertEqual(1, graph["jon"]["john"]["weight"])
self.assertNotIn("attributes", graph["jon"]["john"])
self.assertNotIn("replyCount", registry.attribute_to_type_mapping())

def test_vertex_single_projection(self):
with open(data_file("tiny-multigraph.csv")) as edge_file:
with open(data_file("tiny-graph-vertex.csv")) as vertex_file:
container: GraphContainer = from_file(
graph = from_file(
edge_file,
1,
2,
Expand All @@ -288,12 +277,6 @@ def test_vertex_single_projection(self):
vertex_dialect=csv.excel()
)

graph = container.graph
edge_registry = container.edge_metadata_type_registry
vertex_registry = container.vertex_metadata_type_registry

self.assertTrue(edge_registry != vertex_registry) # specifically verifying memory addresses differ

self.assertEqual(7, len(graph.nodes))
self.assertEqual(2, graph["jon"]["john"]["weight"])
attributes = graph["jon"]["john"]["attributes"]
Expand Down Expand Up @@ -321,7 +304,6 @@ def test_vertex_single_projection(self):
{"date": "7/2/2018", "subject": "No I'm not Lumberg", "replyCount": "0"},
attributes[4]
)
self.assertEqual(int, edge_registry.attribute_to_type_mapping()["replyCount"])

self.assertDictEqual(
{"lastName": "larson", "sandwichPreference": "NULL"},
Expand Down

0 comments on commit 9f75023

Please sign in to comment.