In [2]:
from __future__ import print_function
from pathlib import Path
from random import shuffle
import random

import argparse
import copy
import numpy as np
import sys
import tensorflow as tf
import scipy.sparse as sp

from deeplp.models.data_prep import create_weighted_graph, load_data
from deeplp.models.data_prep import prepare_data, random_unlabel, calc_masks
from deeplp.models.deeplp_att import DeepLP_ATT
from deeplp.models.deeplp_edge import DeepLP_Edge
from deeplp.models.deeplp_wrbf import DeepLP_WRBF
from deeplp.models.lp import LP
from deeplp.models.utils import accuracy, indices_to_vec
from deeplp.models.data_prep import select_features


  return f(*args, **kwds)


In [3]:
def approx_chunk(seq, num):
    avg = len(seq) / float(num)
    out = []
    last = 0.0

    while last < len(seq):
        out.append(seq[int(last):int(last + avg)])
        last += avg

    return out

In [6]:
accuracies = np.zeros((1,10,10))
for i,data in enumerate(['cora']):

    for seed in range(10):
        true_labels, edge_features, node_features, graph \
        = load_data(data,'linqs',directed=1,confidence=0)

        labeled_indices, unlabeled_indices = \
            random_unlabel(true_labels,0.99,
                           seed=seed,confidence=0)
        num_nodes, num_classes = true_labels.shape

        labels, is_labeled = calc_masks(true_labels, labeled_indices, unlabeled_indices, logistic=0, confidence=0)

        for j in range(1,11):
            print(data,seed,j)
            final_accs = []

            labeled_indices_copy = copy.copy(labeled_indices)
            random.seed(seed)
            shuffle(labeled_indices_copy)
            cv_held_out_indices_list = approx_chunk(labeled_indices_copy, 5)


            for k, cv_held_out_indices in enumerate(cv_held_out_indices_list):
                
                cv_labeled_indices = [index for index in labeled_indices if index not in cv_held_out_indices]
                cv_unlabeled_indices = np.delete(np.arange(true_labels.shape[0]),cv_labeled_indices)
                cv_labels, cv_is_labeled = calc_masks(true_labels, cv_labeled_indices, cv_unlabeled_indices,logistic=0)
                lp = LP()
                index = np.hstack([cv_held_out_indices,cv_unlabeled_indices])
                unlabeled_pred = lp.iter_sp(cv_labels,
                                     graph,
                                     cv_is_labeled,
                                     10 * j,
                                     index)
                
                y_pred = np.argmax(unlabeled_pred,axis=1)
                y_true = np.argmax(true_labels[index],axis=1)
                acc = np.mean(y_pred == y_true)
                final_accs.append(acc)
            acc = np.mean(final_accs)
            accuracies[i,seed,j-1] = acc
            print("baseline",acc)


-----------cora-----------
Loading labels...
Loading edge features...
Weights: Asymmetric
Loading graph...
Done!
cora 0 1
baseline 0.651497975709
cora 0 2
baseline 0.653603238866
cora 0 3
baseline 0.655141700405
cora 0 4
baseline 0.655060728745
cora 0 5
baseline 0.651255060729
cora 0 6
baseline 0.648016194332
cora 0 7
baseline 0.642105263158
cora 0 8
baseline 0.635870445344
cora 0 9
baseline 0.626396761134
cora 0 10
baseline 0.617085020243
-----------cora-----------
Loading labels...
Loading edge features...
Weights: Asymmetric
Loading graph...
Done!
cora 1 1
baseline 0.594251012146
cora 1 2
baseline 0.597004048583
cora 1 3
baseline 0.595222672065
cora 1 4
baseline 0.594979757085
cora 1 5
baseline 0.588744939271
cora 1 6
baseline 0.583076923077
cora 1 7
baseline 0.578056680162
cora 1 8
baseline 0.570607287449
cora 1 9
baseline 0.566801619433
cora 1 10
baseline 0.561700404858
-----------cora-----------
Loading labels...
Loading edge features...
Weights: Asymmetric
Loading graph...
Done!

In [8]:
accuracies.shape

(1, 10, 10)

In [12]:
for i in np.mean(accuracies,axis=1)[0]:
    print(i)

0.593246963563
0.600380566802
0.599943319838
0.596510121457
0.588971659919
0.579935222672
0.56991902834
0.559789473684
0.549562753036
0.540842105263


In [3]:
fin_accs = np.zeros((5,10))
i=0

# for data,num_iter in zip(['flickr'],[10]):
for data,num_iter in zip(['flickr'],[10]):


# for data,num_iter in zip(['citeseer','cora','pubmed'],[20,50,20]):

    for seed in range(10):
        true_labels, features, edge_features, node_features, graph \
        = load_data(data,'flip',directed=1,confidence=0)

        labeled_indices, unlabeled_indices = \
            random_unlabel(true_labels,0.99,features,
                           seed=seed,confidence=0)
        num_nodes, num_classes = true_labels.shape

        labels, is_labeled = calc_masks(true_labels, labeled_indices, unlabeled_indices, logistic=0, confidence=0)

        lp = LP()
        unlabeled_pred = lp.iter_sp(labels,
                             graph,
                             is_labeled,
                             num_iter,
                             unlabeled_indices)

        y_pred = np.argmax(unlabeled_pred,axis=1)
        y_true = np.argmax(true_labels[unlabeled_indices],axis=1)
        acc = np.mean(y_pred == y_true)
        print(acc)
        fin_accs[i,seed] = acc
    i+=1

-----------flickr-----------
Loading labels...
Loading features...
Loading edge features...
Loading graph...
Done!
0.6352806995311114
-----------flickr-----------
Loading labels...
Loading features...
Loading edge features...
Loading graph...
Done!
0.7442656190596882
-----------flickr-----------
Loading labels...
Loading features...
Loading edge features...
Loading graph...
Done!
0.5529083766316056
-----------flickr-----------
Loading labels...
Loading features...
Loading edge features...
Loading graph...
Done!
0.547712583956406
-----------flickr-----------
Loading labels...
Loading features...
Loading edge features...
Loading graph...
Done!
0.7225953618045875
-----------flickr-----------
Loading labels...
Loading features...
Loading edge features...
Loading graph...
Done!
0.7175262957800025
-----------flickr-----------
Loading labels...
Loading features...
Loading edge features...
Loading graph...
Done!
0.5886452921049297
-----------flickr-----------
Loading labels...
Loading features

In [4]:
np.mean(fin_accs,axis=1)

array([0.64403751, 0.        , 0.        , 0.        , 0.        ])