Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding lcc #15

Merged
merged 1 commit into from
Jun 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions jhu_primitives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .lse import LaplacianSpectralEmbedding
from .dimselect import DimensionSelection
from .gclust import GaussianClustering
from .lcc import LargestConnectedComponent
from .nonpar import NonParametricClustering
from .numclust import NumberOfClusters
from .oocase import OutOfCoreAdjacencySpectralEmbedding
Expand Down
1 change: 1 addition & 0 deletions jhu_primitives/lcc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .lcc import LargestConnectedComponent
170 changes: 170 additions & 0 deletions jhu_primitives/lcc/lcc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
from rpy2 import robjects
from typing import Sequence, TypeVar, Union, Dict
import os
from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase
import numpy
from d3m import container
from d3m import utils
from d3m.metadata import hyperparams, base as metadata_module, params
from d3m.primitive_interfaces import base
from d3m.primitive_interfaces.base import CallResult
import igraph
import networkx


Inputs = container.ndarray
Outputs = container.ndarray

class Params(params.Params):
pass

class Hyperparams(hyperparams.Hyperparams):
#dim = hyperparams.Hyperparameter[None](default=None)
dim = None

def file_path_conversion(abs_file_path, uri="file"):
local_drive, file_path = abs_file_path.split(':')[0], abs_file_path.split(':')[1]
path_sep = file_path[0]
file_path = file_path[1:] # Remove initial separator
if len(file_path) == 0:
print("Invalid file path: len(file_path) == 0")
return

s = ""
if path_sep == "/":
s = file_path
elif path_sep == "\\":
splits = file_path.split("\\")
data_folder = splits[-1]
for i in splits:
if i != "":
s += "/" + i
else:
print("Unsupported path separator!")
return

if uri == "file":
return "file://localhost" + s
else:
return local_drive + ":" + s

class LargestConnectedComponent(TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
# This should contain only metadata which cannot be automatically determined from the code.
metadata = metadata_module.PrimitiveMetadata({
# Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()".
'id': '32fec24f-6861-4a4c-88f3-d4ec2bc1b486',
'version': "0.1.0",
'name': "jhu.lcc",
# The same path the primitive is registered with entry points in setup.py.
'python_path': 'd3m.primitives.jhu_primitives.LargestConnectedComponent',
# Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable.
'keywords': ['spectral clustering'],
'source': {
'name': "JHU",
'uris': [
# Unstructured URIs. Link to file and link to repo in this case.
'https://github.com/neurodata/primitives-interfaces/jhu_primitives/lcc/lcc.py',
# 'https://github.com/youngser/primitives-interfaces/blob/jp-devM1/jhu_primitives/ase/ase.py',
'https://github.com/neurodata/primitives-interfaces.git',
],
},
# A list of dependencies in order. These can be Python packages, system packages, or Docker images.
# Of course Python packages can also have their own dependencies, but sometimes it is necessary to
# install a Python package first to be even able to run setup.py of another package. Or you have
# a dependency which is not on PyPi.
'installation': [
{
'type': 'UBUNTU',
'package': 'r-base',
'version': '3.4.2'
},
{
'type': 'UBUNTU',
'package': 'libxml2-dev',
'version': '2.9.4'
},
{
'type': 'UBUNTU',
'package': 'libpcre3-dev',
'version': '2.9.4'
},
# {
# 'type': 'UBUNTU',
# 'package': 'r-base-dev',
# 'version': '3.4.2'
# },
# {
# 'type': 'UBUNTU',
# 'package': 'r-recommended',
# 'version': '3.4.2'
# },
{
'type': 'PIP',
'package_uri': 'git+https://github.com/neurodata/primitives-interfaces.git@{git_commit}#egg=jhu_primitives'.format(
git_commit=utils.current_git_commit(os.path.dirname(__file__)),),
},
{
'type': 'PIP',
'package': 'python_igraph',
'version': '0.7.1'
},
{
'type': 'PIP',
'package': 'networkx',
'version': '2.1'
}
],
# URIs at which one can obtain code for the primitive, if available.
# 'location_uris': [
# 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/monomial.py'.format(
# git_commit=utils.current_git_commit(os.path.dirname(__file__)),
# ),
# ],
# Choose these from a controlled vocabulary in the schema. If anything is missing which would
# best describe the primitive, make a merge request.
'algorithm_types': [
"GAUSSIAN_PROCESS"
],
'primitive_family': "GRAPH_CLUSTERING"
})

def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, base.DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Input: g: an n x n matrix, n x 2 edge list, a networkx Graph, or igraph Graph
Output: The largest connected component of g
"""

g = inputs

if type(g) == list:
g = igraph.Graph(g)

if type(g) == numpy.ndarray:
if g.shape[0] == g.shape[1]: # n x n matrix
g = networkx.Graph(g) # convert to networkx graph to be able to extract edge list
elif g.shape[1] == 2: # n x 2 matrix
g = igraph.Graph(list(g))
else:
print("Neither n x n nor n x 2. Please submit a square matrix or edge list.")
return

if type(g) == networkx.classes.graph.Graph: # networkx graph
g = igraph.Graph(list(g.edges)) # convert to igraph graph, find the clusters

if type(g) == igraph.Graph: # igraph graph
components = g.clusters()
components_len = [len(components[i]) for i in range(len(components))] # find lengths of components (faster way?)
largest_component = components[numpy.argmax(components_len)]
else:
print("Unsupported graph type")
return

result = numpy.array(largest_component)

outputs = container.ndarray(result)

return base.CallResult(outputs)
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def read_package_variable(key):
packages=[
PACKAGE_NAME,
'jhu_primitives.ase',
'jhu_primitives.lcc',
'jhu_primitives.lse',
'jhu_primitives.dimselect',
'jhu_primitives.gclust',
Expand All @@ -54,6 +55,7 @@ def read_package_variable(key):
'd3m.primitives': [
'jhu_primitives.AdjacencySpectralEmbedding=jhu_primitives.ase:AdjacencySpectralEmbedding',
'jhu_primitives.LaplacianSpectralEmbedding=jhu_primitives.lse:LaplacianSpectralEmbedding',
'jhu_primitives.LargestConnectedComponent'
'jhu_primitives.DimensionSelection=jhu_primitives.dimselect:DimensionSelection',
'jhu_primitives.GaussianClustering=jhu_primitives.gclust:GaussianClustering',
'jhu_primitives.NonParametricClustering=jhu_primitives.nonpar:NonParametricClustering',
Expand All @@ -67,7 +69,7 @@ def read_package_variable(key):
},
package_data = {'': ['*.r', '*.R']},
include_package_data = True,
install_requires=['typing', 'numpy', 'scipy',
install_requires=['typing', 'numpy', 'scipy', 'networkx',
'python-igraph', 'rpy2', 'sklearn', 'jinja2', 'd3m'],
url='https://github.com/neurodata/primitives-interfaces',
)
Expand All @@ -77,6 +79,7 @@ def read_package_variable(key):
PACKAGE_NAME,
'jhu_primitives.ase',
'jhu_primitives.lse',
'jhu_primitives.lcc',
'jhu_primitives.dimselect',
'jhu_primitives.gclust',
'jhu_primitives.nonpar',
Expand Down