In [15]:
import pandas as pd
import numpy as np
from random import sample
from itertools import combinations
from scipy.io import loadmat, savemat
from sklearn.cluster import SpectralClustering, KMeans
from typing import Sequence, List, Set
from clustering import spectral_partition
import random
import math

In [16]:
def clustering_to_labeling(clusters):
    labels = np.zeros(sum(map(len, clusters)), dtype=int)
    for k, cluster in enumerate(clusters):
        for v in cluster:
            labels[v] = k
    return labels

In [17]:
def vi(predicted_labels: Sequence[int],
       true_labels: Sequence[int]) -> float:
    def expand(labels: Sequence[int]) -> List[Set[int]]:
        expanded = {label: set() for label in labels}
        for node, label in enumerate(labels):
            expanded[label].add(node)
        return list(expanded.values())

    n = len(predicted_labels)
    predicted = expand(predicted_labels)
    p = list(map(lambda x: len(x) / n, predicted))
    true = expand(true_labels)
    q = list(map(lambda x: len(x) / n, true))
    r = [[len(predicted[i].intersection(true[j])) / n
          for j in range(len(true))]
         for i in range(len(predicted))]
    vi = sum([r[i][j] * (np.log(r[i][j] / p[i]) + np.log(r[i][j] / q[j]))
              if r[i][j] > 0 else 0
              for j in range(len(true))
              for i in range(len(predicted))]) * -1
    return abs(vi)


In [18]:
def label_to_cluster(labels):
    num_clusters = len(set(labels))
    v_hat = [set() for _ in range(num_clusters)]
    for i, label in enumerate(labels):
        v_hat[label].add(i)
    return v_hat

In [19]:
from itertools import product


def edge_error_rate(predicted_v, true_v):
    n = sum(map(len, true_v))
    A_hat = np.zeros((n, n), dtype=int)
    A = np.zeros((n, n), dtype=int)
    for cluster in map(list, predicted_v):
        for i, j in product(cluster, cluster):
            A_hat[i, j] = 1
    for cluster in map(list, true_v):
        for i, j in product(cluster, cluster):
            A[i, j] = 1
    return (A_hat != A).sum() / (n * n)

In [20]:
n = 900

In [None]:
k_pred_s = [14, 24]
ks = [3, 30]
ts = [41014, 277367]


for at in range(4):
    K_pred = k_pred_s[at]
    K = ks[at]
    T = ts[at]
    labels_true = sum([[_k] * int(n / K) for _k in range(K)], [])

    pair_prob_mat = loadmat(f'../../mat/pair_prob_mat_K={K}.mat')['P']


    repeats = np.array([1]) if K < 90 else np.array([1, 3, 5])
    for repeat in repeats:

        A_at = []
        F_at = []
        Yun_at = []

        for it in range(10):

            np.random.seed(it)
            random.seed(it)

            A = np.zeros((n, n), dtype=int)
            F = np.zeros((n, n), dtype=int)
            Yun = np.zeros((n, n), dtype=int)
            num_samples = math.ceil(T / repeat)
            if K >= 90 and repeat == 1:
                num_samples = int(900 * 899 / 2)

            pairs = sample(list(combinations(range(n), 2)), num_samples)

            for i, j in pairs:
                num_ones = (np.random.random(size=repeat) < pair_prob_mat[i, j]).sum()
                Yun[i, j] += num_ones
                F[i, j] += repeat
                F[j, i] += repeat
                if num_ones > repeat / 2:
                    A[i, j] = 1
                    A[j, i] = 1
                else:
                    A[i, j] = 0
                    A[j, i] = 0

            A_at.append(A)
            F_at.append(F)
            Yun_at.append(Yun)

        # savemat(f'./adjK{K}R{repeat}.mat', {f'AK{K}R{repeat}': A_at})
        # savemat(f'./FK{K}R{repeat}.mat', {f'FK{K}R{repeat}': F_at})
        # savemat(f'./YunK{K}R{repeat}.mat', {f'YunK{K}R{repeat}': Yun_at})