From a7e448685f5252f9f23c3998727a0b8c3ca3a6c7 Mon Sep 17 00:00:00 2001 From: Joshua Date: Tue, 12 Jun 2018 16:15:31 -0400 Subject: [PATCH] adding lcc --- jhu_primitives/__init__.py | 1 + jhu_primitives/lcc/__init__.py | 1 + jhu_primitives/lcc/lcc.py | 170 +++++++++++++++++++++++++++++++++ setup.py | 5 +- 4 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 jhu_primitives/lcc/__init__.py create mode 100644 jhu_primitives/lcc/lcc.py diff --git a/jhu_primitives/__init__.py b/jhu_primitives/__init__.py index 7f47985..bdfd661 100644 --- a/jhu_primitives/__init__.py +++ b/jhu_primitives/__init__.py @@ -12,6 +12,7 @@ from .lse import LaplacianSpectralEmbedding from .dimselect import DimensionSelection from .gclust import GaussianClustering +from .lcc import LargestConnectedComponent from .nonpar import NonParametricClustering from .numclust import NumberOfClusters from .oocase import OutOfCoreAdjacencySpectralEmbedding diff --git a/jhu_primitives/lcc/__init__.py b/jhu_primitives/lcc/__init__.py new file mode 100644 index 0000000..1238334 --- /dev/null +++ b/jhu_primitives/lcc/__init__.py @@ -0,0 +1 @@ +from .lcc import LargestConnectedComponent diff --git a/jhu_primitives/lcc/lcc.py b/jhu_primitives/lcc/lcc.py new file mode 100644 index 0000000..a2da14b --- /dev/null +++ b/jhu_primitives/lcc/lcc.py @@ -0,0 +1,170 @@ +from rpy2 import robjects +from typing import Sequence, TypeVar, Union, Dict +import os +from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase +import numpy +from d3m import container +from d3m import utils +from d3m.metadata import hyperparams, base as metadata_module, params +from d3m.primitive_interfaces import base +from d3m.primitive_interfaces.base import CallResult +import igraph +import networkx + + +Inputs = container.ndarray +Outputs = container.ndarray + +class Params(params.Params): + pass + +class Hyperparams(hyperparams.Hyperparams): + #dim = hyperparams.Hyperparameter[None](default=None) + dim = None + +def file_path_conversion(abs_file_path, uri="file"): + local_drive, file_path = abs_file_path.split(':')[0], abs_file_path.split(':')[1] + path_sep = file_path[0] + file_path = file_path[1:] # Remove initial separator + if len(file_path) == 0: + print("Invalid file path: len(file_path) == 0") + return + + s = "" + if path_sep == "/": + s = file_path + elif path_sep == "\\": + splits = file_path.split("\\") + data_folder = splits[-1] + for i in splits: + if i != "": + s += "/" + i + else: + print("Unsupported path separator!") + return + + if uri == "file": + return "file://localhost" + s + else: + return local_drive + ":" + s + +class LargestConnectedComponent(TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + # This should contain only metadata which cannot be automatically determined from the code. + metadata = metadata_module.PrimitiveMetadata({ + # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". + 'id': '32fec24f-6861-4a4c-88f3-d4ec2bc1b486', + 'version': "0.1.0", + 'name': "jhu.lcc", + # The same path the primitive is registered with entry points in setup.py. + 'python_path': 'd3m.primitives.jhu_primitives.LargestConnectedComponent', + # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. + 'keywords': ['spectral clustering'], + 'source': { + 'name': "JHU", + 'uris': [ + # Unstructured URIs. Link to file and link to repo in this case. + 'https://github.com/neurodata/primitives-interfaces/jhu_primitives/lcc/lcc.py', +# 'https://github.com/youngser/primitives-interfaces/blob/jp-devM1/jhu_primitives/ase/ase.py', + 'https://github.com/neurodata/primitives-interfaces.git', + ], + }, + # A list of dependencies in order. These can be Python packages, system packages, or Docker images. + # Of course Python packages can also have their own dependencies, but sometimes it is necessary to + # install a Python package first to be even able to run setup.py of another package. Or you have + # a dependency which is not on PyPi. + 'installation': [ + { + 'type': 'UBUNTU', + 'package': 'r-base', + 'version': '3.4.2' + }, + { + 'type': 'UBUNTU', + 'package': 'libxml2-dev', + 'version': '2.9.4' + }, + { + 'type': 'UBUNTU', + 'package': 'libpcre3-dev', + 'version': '2.9.4' + }, +# { +# 'type': 'UBUNTU', +# 'package': 'r-base-dev', +# 'version': '3.4.2' +# }, +# { +# 'type': 'UBUNTU', +# 'package': 'r-recommended', +# 'version': '3.4.2' +# }, + { + 'type': 'PIP', + 'package_uri': 'git+https://github.com/neurodata/primitives-interfaces.git@{git_commit}#egg=jhu_primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)),), + }, + { + 'type': 'PIP', + 'package': 'python_igraph', + 'version': '0.7.1' + }, + { + 'type': 'PIP', + 'package': 'networkx', + 'version': '2.1' + } + ], + # URIs at which one can obtain code for the primitive, if available. + # 'location_uris': [ + # 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/monomial.py'.format( + # git_commit=utils.current_git_commit(os.path.dirname(__file__)), + # ), + # ], + # Choose these from a controlled vocabulary in the schema. If anything is missing which would + # best describe the primitive, make a merge request. + 'algorithm_types': [ + "GAUSSIAN_PROCESS" + ], + 'primitive_family': "GRAPH_CLUSTERING" + }) + + def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, base.DockerContainer] = None) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Input: g: an n x n matrix, n x 2 edge list, a networkx Graph, or igraph Graph + Output: The largest connected component of g + + """ + + g = inputs + + if type(g) == list: + g = igraph.Graph(g) + + if type(g) == numpy.ndarray: + if g.shape[0] == g.shape[1]: # n x n matrix + g = networkx.Graph(g) # convert to networkx graph to be able to extract edge list + elif g.shape[1] == 2: # n x 2 matrix + g = igraph.Graph(list(g)) + else: + print("Neither n x n nor n x 2. Please submit a square matrix or edge list.") + return + + if type(g) == networkx.classes.graph.Graph: # networkx graph + g = igraph.Graph(list(g.edges)) # convert to igraph graph, find the clusters + + if type(g) == igraph.Graph: # igraph graph + components = g.clusters() + components_len = [len(components[i]) for i in range(len(components))] # find lengths of components (faster way?) + largest_component = components[numpy.argmax(components_len)] + else: + print("Unsupported graph type") + return + + result = numpy.array(largest_component) + + outputs = container.ndarray(result) + + return base.CallResult(outputs) \ No newline at end of file diff --git a/setup.py b/setup.py index d19a550..d53bf6c 100755 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ def read_package_variable(key): packages=[ PACKAGE_NAME, 'jhu_primitives.ase', + 'jhu_primitives.lcc', 'jhu_primitives.lse', 'jhu_primitives.dimselect', 'jhu_primitives.gclust', @@ -54,6 +55,7 @@ def read_package_variable(key): 'd3m.primitives': [ 'jhu_primitives.AdjacencySpectralEmbedding=jhu_primitives.ase:AdjacencySpectralEmbedding', 'jhu_primitives.LaplacianSpectralEmbedding=jhu_primitives.lse:LaplacianSpectralEmbedding', + 'jhu_primitives.LargestConnectedComponent' 'jhu_primitives.DimensionSelection=jhu_primitives.dimselect:DimensionSelection', 'jhu_primitives.GaussianClustering=jhu_primitives.gclust:GaussianClustering', 'jhu_primitives.NonParametricClustering=jhu_primitives.nonpar:NonParametricClustering', @@ -67,7 +69,7 @@ def read_package_variable(key): }, package_data = {'': ['*.r', '*.R']}, include_package_data = True, - install_requires=['typing', 'numpy', 'scipy', + install_requires=['typing', 'numpy', 'scipy', 'networkx', 'python-igraph', 'rpy2', 'sklearn', 'jinja2', 'd3m'], url='https://github.com/neurodata/primitives-interfaces', ) @@ -77,6 +79,7 @@ def read_package_variable(key): PACKAGE_NAME, 'jhu_primitives.ase', 'jhu_primitives.lse', + 'jhu_primitives.lcc', 'jhu_primitives.dimselect', 'jhu_primitives.gclust', 'jhu_primitives.nonpar',