In [1]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

import d3m
from d3m.container import pandas as pd
from d3m.container.dataset import D3MDatasetLoader
from d3m.metadata import hyperparams

import os

from jhu_primitives.utils import file_path_conversion
from jhu_primitives.utils.util import data_file_uri

import jhu_primitives as jhu

d3m.primitives.link_prediction.data_conversion.JHU: Primitive's Python path does not adhere to d3m.primitives namespace specification. Reason: must have a known primitive name segment.
d3m.primitives.link_prediction.data_conversion.JHU: Primitive is not providing a description through its docstring.
d3m.primitives.jhu_primitives.LinkPredictionRankClassifier: Primitive's Python path does not adhere to d3m.primitives namespace specification. Reason: must have 5 segments.
d3m.primitives.jhu_primitives.LinkPredictionRankClassifier: Primitive is not providing a description through its docstring.
d3m.primitives.vertex_nomination.spectral_vertex_nomination.JHU: Primitive's Python path does not adhere to d3m.primitives namespace specification. Reason: must have a known primitive name segment.


In [2]:
import _pickle as pickle
# pickle.dump(data_TRUTH, open('LL1_truth.pkl', 'wb'))
# pickle.dump(data_TRAIN, open('LL1_train.pkl', 'wb'))
# pickle.dump(data_TEST, open('LL1_test.pkl', 'wb'))

truth = pickle.load(open('LL1_truth.pkl', 'rb'))
train = pickle.load(open('LL1_train.pkl', 'rb'))
test = pickle.load(open('LL1_test.pkl', 'rb'))

In [3]:
# LL1 ASE

In [4]:
hp_lcc = jhu.lcc.lcc.Hyperparams().defaults
hp_ase = jhu.ase.ase.Hyperparams({'use_attributes': True, 'max_dimension': 5, 'which_elbow': 1})
hp_gclass = jhu.gclass.gclass.Hyperparams().defaults

# Initialize
LCC = jhu.LargestConnectedComponent(hyperparams=hp_lcc)
ASE = jhu.AdjacencySpectralEmbedding(hyperparams=hp_ase)
GCLASS = jhu.GaussianClassification(hyperparams=hp_gclass)

# Train
lcc_train = LCC.produce(inputs = train).value
ase_train = ASE.produce(inputs = lcc_train).value
GCLASS.set_training_data(inputs = ase_train)
GCLASS.fit()

# Test
lcc_test = LCC.produce(inputs = test).value
ase_test = ASE.produce(inputs = lcc_test).value
predictions = GCLASS.produce(inputs = ase_test).value

truth_labels = np.array(truth['learningData']['classLabel'])[np.array(predictions['d3mIndex'])]
preds = np.array(predictions['classLabel'])
np.sum(preds.astype(str) == truth_labels)/len(preds)

0.9125

In [5]:
# LL1 LSE

In [6]:
hp_lcc = jhu.lcc.lcc.Hyperparams().defaults
hp_lse = jhu.lse.lse.Hyperparams({'use_attributes': True, 'max_dimension': 5, 'which_elbow': 1})
hp_gclass = jhu.gclass.gclass.Hyperparams().defaults

# Initialize
LCC = jhu.LargestConnectedComponent(hyperparams=hp_lcc)
LSE = jhu.LaplacianSpectralEmbedding(hyperparams=hp_lse)
GCLASS = jhu.GaussianClassification(hyperparams=hp_gclass)

# Train
lcc_train = LCC.produce(inputs = train).value
lse_train = LSE.produce(inputs = lcc_train).value
GCLASS.set_training_data(inputs = lse_train)
GCLASS.fit()

# Test
lcc_test = LCC.produce(inputs = test).value
lse_test = LSE.produce(inputs = lcc_test).value
predictions = GCLASS.produce(inputs = lse_test).value

truth_labels = np.array(truth['learningData']['classLabel'])[np.array(predictions['d3mIndex'])]
preds = np.array(predictions['classLabel'])
np.sum(preds.astype(str) == truth_labels)/len(preds)

0.8375

In [7]:
# LL1 SGC

In [8]:
hp_sgc = jhu.sgc.sgc.Hyperparams().defaults()

SGC = jhu.SpectralGraphClustering(hyperparams=hp_sgc)

SGC.set_training_data(inputs = train)
SGC.fit()
predictions = SGC.produce(inputs = test).value

truth_labels = np.array(truth['learningData']['classLabel'])[np.array(predictions['d3mIndex']).astype(int)]
preds = np.array(predictions['classLabel'])
np.sum(preds.astype(str) == truth_labels)/len(preds)

0.9125

In [None]:
import _pickle as pickle
abs_file_path = os.path.abspath(os.getcwd())
dataset_uri = data_file_uri(abs_file_path, uri = "file", datasetDoc = True, dataset_type="")
data = D3MDatasetLoader().load(dataset_uri = dataset_uri)
pickle.dump(data, open('EDGELIST_truth.pkl', 'wb'))

dataset_uri = data_file_uri(abs_file_path, uri = "file", datasetDoc = True, dataset_type="TRAIN")
data = D3MDatasetLoader().load(dataset_uri = dataset_uri)
pickle.dump(data, open('EDGELIST_train.pkl', 'wb'))

dataset_uri = data_file_uri(abs_file_path, uri = "file", datasetDoc = True, dataset_type="TEST")
data = D3MDatasetLoader().load(dataset_uri = dataset_uri)
pickle.dump(data, open('EDGELIST_test.pkl', 'wb'))

truth = pickle.load(open('EDGELIST_truth.pkl', 'rb'))
train = pickle.load(open('EDGELIST_train.pkl', 'rb'))
test = pickle.load(open('EDGELIST_test.pkl', 'rb'))

Enter 
 0: exit 
 1: seed_datasets_current 
 2: training_datasets 
 3: if already in the data folder 
1
Enter 
 0: exit 
 Name of the data folder (case sensitive; must be in datasets/seed_datasets_current) 
LL1_EDGELIST_net_nomination_seed
Enter 
 0: exit 
 Name of the data folder (case sensitive; must be in datasets/seed_datasets_current) 
LL1_EDGELIST_net_nomination
Enter 
 0: exit 
 Name of the data folder (case sensitive; must be in datasets/seed_datasets_current) 
LL1_EDGELIST_net_nom_seed
Enter 
 0: exit 
 Name of the data folder (case sensitive; must be in datasets/seed_datasets_current) 
LL1_EDGELIST_net_nomination_seed


In [10]:
# EDGELIST ASE

In [11]:
hp_lcc = jhu.lcc.lcc.Hyperparams().defaults
hp_ase = jhu.ase.ase.Hyperparams({'use_attributes': True, 'max_dimension': 5, 'which_elbow': 1})
hp_gclass = jhu.gclass.gclass.Hyperparams().defaults

# Initialize
LCC = jhu.LargestConnectedComponent(hyperparams=hp_lcc)
ASE = jhu.AdjacencySpectralEmbedding(hyperparams=hp_ase)
GCLASS = jhu.GaussianClassification(hyperparams=hp_gclass)

# Train
lcc_train = LCC.produce(inputs = train).value
ase_train = ASE.produce(inputs = lcc_train).value
GCLASS.set_training_data(inputs = ase_train)
GCLASS.fit()

# Test
lcc_test = LCC.produce(inputs = test).value
ase_test = ASE.produce(inputs = lcc_test).value
predictions = GCLASS.produce(inputs = ase_test).value

truth_labels = np.array(truth['learningData']['classLabel'])[np.array(predictions['d3mIndex'])]
preds = np.array(predictions['classLabel'])
np.sum(preds.astype(str) == truth_labels)/len(preds)

0.6625

In [12]:
# EDGELIST LSE

In [13]:
hp_lcc = jhu.lcc.lcc.Hyperparams().defaults
hp_lse = jhu.lse.lse.Hyperparams({'use_attributes': True, 'max_dimension': 5, 'which_elbow': 1})
hp_gclass = jhu.gclass.gclass.Hyperparams().defaults

# Initialize
LCC = jhu.LargestConnectedComponent(hyperparams=hp_lcc)
LSE = jhu.LaplacianSpectralEmbedding(hyperparams=hp_lse)
GCLASS = jhu.GaussianClassification(hyperparams=hp_gclass)

# Train
lcc_train = LCC.produce(inputs = train).value
lse_train = LSE.produce(inputs = lcc_train).value
GCLASS.set_training_data(inputs = lse_train)
GCLASS.fit()

# Test
lcc_test = LCC.produce(inputs = test).value
lse_test = LSE.produce(inputs = lcc_test).value
predictions = GCLASS.produce(inputs = lse_test).value

truth_labels = np.array(truth['learningData']['classLabel'])[np.array(predictions['d3mIndex'])]
preds = np.array(predictions['classLabel'])
np.sum(preds.astype(str) == truth_labels)/len(preds)

0.75

In [14]:
# EDGELIST SGC

In [15]:
hp_sgc = jhu.sgc.sgc.Hyperparams().defaults()

SGC = jhu.SpectralGraphClustering(hyperparams=hp_sgc)

SGC.set_training_data(inputs = train)
SGC.fit()
predictions = SGC.produce(inputs = test).value

truth_labels = np.array(truth['learningData']['classLabel'])[np.array(predictions['d3mIndex']).astype(int)]
preds = np.array(predictions['classLabel'])
np.sum(preds.astype(str) == truth_labels)/len(preds)

0.6625