In [26]:
from data_utils import DataLoader
from spectral_mix import SpectralMix
import time
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
import pandas as pd

In [44]:
dl = DataLoader()

test_params = [
    {
        'dataset_name': 'acm',
        'd': 9,
        'k': 3
    },
    {
        'dataset_name': 'dblp',
        'd': 2,
        'k': 3
    },
    {
        'dataset_name': 'flickr',
        'd': 11,
        'k': 7
    },
    {
        'dataset_name': 'imdb',
        'd': 2,
        'k': 3
    }
]

test_results = pd.DataFrame(columns=['dataset', 'nmi', 'ari', 'runtime'])
for reprod_test in test_params:
    dataset_name = reprod_test['dataset_name']
    d = reprod_test['d']
    k = reprod_test['k']

    dataset = dl.load_dataset(dataset_name)
    print(f'=== {dataset_name} ===')
    print(dataset['adjacency_matrix'].shape)
    if not dataset['attribute_matrix'] is None:
        print(dataset['attribute_matrix'].shape)
    print(dataset['true_labels'].shape)

    begin = time.time()
    sm = SpectralMix(d=d, k=k)
    sm.fit(dataset['adjacency_matrix'], dataset['attribute_matrix'])
    end = time.time()

    labels = sm.labels_
    nmi = normalized_mutual_info_score(dataset['true_labels'], labels)
    ari = adjusted_rand_score(dataset['true_labels'], labels)

    result = []
    result.append(dataset_name)
    result.append(nmi)
    result.append(ari)
    result.append(end - begin)
    test_results.loc[len(test_results)] = result

    print(result)

test_results.to_csv('test_results/reproducability_test.csv')

=== imdb ===
(3550, 3550, 2)
(3550, 2000)
(3550,)


100%|██████████| 100/100 [00:54<00:00,  1.84it/s]

['imdb', 0.002845453447688705, 0.001299650749314584, 54.57835626602173]



