Skip to content

Commit

Permalink
Merge pull request #15 from jagterberg/hh-dev
Browse files Browse the repository at this point in the history
adding lcc
  • Loading branch information
jagterberg committed Jun 12, 2018
2 parents 96dfafa + a7e4486 commit b395f1d
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 1 deletion.
1 change: 1 addition & 0 deletions jhu_primitives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .lse import LaplacianSpectralEmbedding
from .dimselect import DimensionSelection
from .gclust import GaussianClustering
from .lcc import LargestConnectedComponent
from .nonpar import NonParametricClustering
from .numclust import NumberOfClusters
from .oocase import OutOfCoreAdjacencySpectralEmbedding
Expand Down
1 change: 1 addition & 0 deletions jhu_primitives/lcc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .lcc import LargestConnectedComponent
170 changes: 170 additions & 0 deletions jhu_primitives/lcc/lcc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
from rpy2 import robjects
from typing import Sequence, TypeVar, Union, Dict
import os
from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase
import numpy
from d3m import container
from d3m import utils
from d3m.metadata import hyperparams, base as metadata_module, params
from d3m.primitive_interfaces import base
from d3m.primitive_interfaces.base import CallResult
import igraph
import networkx


Inputs = container.ndarray
Outputs = container.ndarray

class Params(params.Params):
pass

class Hyperparams(hyperparams.Hyperparams):
#dim = hyperparams.Hyperparameter[None](default=None)
dim = None

def file_path_conversion(abs_file_path, uri="file"):
local_drive, file_path = abs_file_path.split(':')[0], abs_file_path.split(':')[1]
path_sep = file_path[0]
file_path = file_path[1:] # Remove initial separator
if len(file_path) == 0:
print("Invalid file path: len(file_path) == 0")
return

s = ""
if path_sep == "/":
s = file_path
elif path_sep == "\\":
splits = file_path.split("\\")
data_folder = splits[-1]
for i in splits:
if i != "":
s += "/" + i
else:
print("Unsupported path separator!")
return

if uri == "file":
return "file://localhost" + s
else:
return local_drive + ":" + s

class LargestConnectedComponent(TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
# This should contain only metadata which cannot be automatically determined from the code.
metadata = metadata_module.PrimitiveMetadata({
# Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()".
'id': '32fec24f-6861-4a4c-88f3-d4ec2bc1b486',
'version': "0.1.0",
'name': "jhu.lcc",
# The same path the primitive is registered with entry points in setup.py.
'python_path': 'd3m.primitives.jhu_primitives.LargestConnectedComponent',
# Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable.
'keywords': ['spectral clustering'],
'source': {
'name': "JHU",
'uris': [
# Unstructured URIs. Link to file and link to repo in this case.
'https://github.com/neurodata/primitives-interfaces/jhu_primitives/lcc/lcc.py',
# 'https://github.com/youngser/primitives-interfaces/blob/jp-devM1/jhu_primitives/ase/ase.py',
'https://github.com/neurodata/primitives-interfaces.git',
],
},
# A list of dependencies in order. These can be Python packages, system packages, or Docker images.
# Of course Python packages can also have their own dependencies, but sometimes it is necessary to
# install a Python package first to be even able to run setup.py of another package. Or you have
# a dependency which is not on PyPi.
'installation': [
{
'type': 'UBUNTU',
'package': 'r-base',
'version': '3.4.2'
},
{
'type': 'UBUNTU',
'package': 'libxml2-dev',
'version': '2.9.4'
},
{
'type': 'UBUNTU',
'package': 'libpcre3-dev',
'version': '2.9.4'
},
# {
# 'type': 'UBUNTU',
# 'package': 'r-base-dev',
# 'version': '3.4.2'
# },
# {
# 'type': 'UBUNTU',
# 'package': 'r-recommended',
# 'version': '3.4.2'
# },
{
'type': 'PIP',
'package_uri': 'git+https://github.com/neurodata/primitives-interfaces.git@{git_commit}#egg=jhu_primitives'.format(
git_commit=utils.current_git_commit(os.path.dirname(__file__)),),
},
{
'type': 'PIP',
'package': 'python_igraph',
'version': '0.7.1'
},
{
'type': 'PIP',
'package': 'networkx',
'version': '2.1'
}
],
# URIs at which one can obtain code for the primitive, if available.
# 'location_uris': [
# 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/monomial.py'.format(
# git_commit=utils.current_git_commit(os.path.dirname(__file__)),
# ),
# ],
# Choose these from a controlled vocabulary in the schema. If anything is missing which would
# best describe the primitive, make a merge request.
'algorithm_types': [
"GAUSSIAN_PROCESS"
],
'primitive_family': "GRAPH_CLUSTERING"
})

def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, base.DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Input: g: an n x n matrix, n x 2 edge list, a networkx Graph, or igraph Graph
Output: The largest connected component of g
"""

g = inputs

if type(g) == list:
g = igraph.Graph(g)

if type(g) == numpy.ndarray:
if g.shape[0] == g.shape[1]: # n x n matrix
g = networkx.Graph(g) # convert to networkx graph to be able to extract edge list
elif g.shape[1] == 2: # n x 2 matrix
g = igraph.Graph(list(g))
else:
print("Neither n x n nor n x 2. Please submit a square matrix or edge list.")
return

if type(g) == networkx.classes.graph.Graph: # networkx graph
g = igraph.Graph(list(g.edges)) # convert to igraph graph, find the clusters

if type(g) == igraph.Graph: # igraph graph
components = g.clusters()
components_len = [len(components[i]) for i in range(len(components))] # find lengths of components (faster way?)
largest_component = components[numpy.argmax(components_len)]
else:
print("Unsupported graph type")
return

result = numpy.array(largest_component)

outputs = container.ndarray(result)

return base.CallResult(outputs)
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def read_package_variable(key):
packages=[
PACKAGE_NAME,
'jhu_primitives.ase',
'jhu_primitives.lcc',
'jhu_primitives.lse',
'jhu_primitives.dimselect',
'jhu_primitives.gclust',
Expand All @@ -54,6 +55,7 @@ def read_package_variable(key):
'd3m.primitives': [
'jhu_primitives.AdjacencySpectralEmbedding=jhu_primitives.ase:AdjacencySpectralEmbedding',
'jhu_primitives.LaplacianSpectralEmbedding=jhu_primitives.lse:LaplacianSpectralEmbedding',
'jhu_primitives.LargestConnectedComponent'
'jhu_primitives.DimensionSelection=jhu_primitives.dimselect:DimensionSelection',
'jhu_primitives.GaussianClustering=jhu_primitives.gclust:GaussianClustering',
'jhu_primitives.NonParametricClustering=jhu_primitives.nonpar:NonParametricClustering',
Expand All @@ -67,7 +69,7 @@ def read_package_variable(key):
},
package_data = {'': ['*.r', '*.R']},
include_package_data = True,
install_requires=['typing', 'numpy', 'scipy',
install_requires=['typing', 'numpy', 'scipy', 'networkx',
'python-igraph', 'rpy2', 'sklearn', 'jinja2', 'd3m'],
url='https://github.com/neurodata/primitives-interfaces',
)
Expand All @@ -77,6 +79,7 @@ def read_package_variable(key):
PACKAGE_NAME,
'jhu_primitives.ase',
'jhu_primitives.lse',
'jhu_primitives.lcc',
'jhu_primitives.dimselect',
'jhu_primitives.gclust',
'jhu_primitives.nonpar',
Expand Down

0 comments on commit b395f1d

Please sign in to comment.