In [360]:
# https://github.com/jfpower/anfis-pytorch/blob/master/cmeans.py
# https://github.com/holtskinner/PossibilisticCMeans/blob/master/cmeans.py

import itertools
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader


class FuzzyCMeans(nn.Module):
    '''fuzzy c-means clustering.'''

    def __init__(self, num_clusters, num_features, m=1.7, epsilon=1e-12, device=torch.device("cpu")):

        super(FuzzyCMeans, self).__init__()

        self.num_clusters = num_clusters
        self.num_features = num_features
        self.m = m
        self.epsilon = epsilon

        self.C = torch.zeros(self.num_clusters, self.num_features).to(device)
        self.C_init = False


    def _initialize_C(self, X):

        # initialize cluster centers randomly from observed data
        self.C = X[np.random.choice(len(X), self.num_clusters, replace=False)]
        self.C_init = True


    def _euclidean_distance(self, x1, x2):
        
        dist = (x1.unsqueeze(dim=-2) - x2.unsqueeze(dim=-3)).pow(2).sum(dim=-1).squeeze()
    
        return dist.clamp_min_(self.epsilon)


    def _update_C(self, X, U):

        UM = U.pow(self.m)
        C = torch.einsum("mi, mj -> ij", UM, X)
        C /= UM.sum(dim=0).clamp_min_(self.epsilon).unsqueeze(1)
        self.C = C


    def _assign_cluster(self, X):

        D = self._euclidean_distance(X, self.C)
        U = D.pow(-2. / (self.m - 1))
        U /= U.sum(dim=1, keepdim=True)
        
        return U

    
    def predict(self, X, threshold=None, y=None):

        y_pred = self._assign_cluster(X)
     
        if threshold is not None:

            y_pred = torch.where(y_pred > threshold, 1., 0.)

            if y is not None:
                
                y_pred, _ = permute_labels(y_pred , y)

        return y_pred


    def forward(self, X):

        if not self.C_init:
            self._initialize_C(X)

        C_init = self.C

        U = self._assign_cluster(X)
        self._update_C(X, U)

        loss = F.l1_loss(self.C, C_init, reduction="sum")
        
        return loss

In [513]:
def fuzzy_c_means_full(dataset, num_clusters, m=1.7, num_epochs=250, min_change=1e-5):

    X = dataset
    num_features = X.shape[-1]

    fuzzy_c_means = FuzzyCMeans(num_clusters, num_features, m)

    for epoch in range(num_epochs):

        loss = fuzzy_c_means(X)

        print("epoch {:4d} of {:4d} | loss {:6.4f}".format(epoch, num_epochs, loss.item()))

        if loss < min_change:
            break

    return fuzzy_c_means


def fuzzy_c_means_mini_batch(dataset, num_clusters, m=1.7, batch_size=500, num_epochs=250, min_change=1e-5):

    num_samples, time_len, num_nodes, num_features = A.shape = dataset.tensors[0].shape

    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    fcm = FuzzyCMeans(num_clusters, num_features, m)

    for epoch in range(num_epochs):

        C = fcm.C

        for X, _F in dataloader:

            _ = fcm(X)

        loss = F.l1_loss(fcm.C, C, reduction="sum")

        print("epoch {:4d} of {:4d} | loss {:8.6f}".format(epoch, num_epochs, loss.item()))

        if loss < min_change:
            break
        
    return fcm

In [514]:
def permute_labels(y_pred, y):

    # check shape
    # check one hot

    best_acc = 0.
    best_perm = tuple()

    for perm in list(itertools.permutations(range(y.shape[-1]))):
        
        y_pred_prem = y_pred[..., perm]
        acc = ((y_pred_prem == y) * 1.).mean().item()

        if acc > best_acc:
            
            best_acc = acc
            best_perm = perm

    return y_pred[..., best_perm], best_perm

In [521]:
data_path = "../data/data.npz"

data_dic = dict(np.load(data_path, allow_pickle=True).items())
A = torch.from_numpy(data_dic["A"]).float()
# num_samples, time_len, num_nodes, _ = A.shape
# indices = np.random.choice(range(num_samples), 10)
# A = A[indices]
X = torch.from_numpy(data_dic["X"]).float() #[indices]
C = torch.from_numpy(data_dic["C"]).float() #[indices]

#num_samples, time_len, num_nodes, _ = A.shape
A_X = torch.cat([A, X], dim=-1) #.reshape(num_samples * time_len * num_nodes, -1)
#C = C.reshape(num_samples * time_len * num_nodes, -1)
dataset = TensorDataset(A_X, C)

print(A_X.shape[0])

# fcm = fuzzy_c_means_mini_batch(dataset, num_clusters=4, m=1.7, num_epochs=100, batch_size=A_X.shape[0])
# fcm = sgd_cluster(dataset, num_clusters=4)

1000


In [519]:
A_X, y = dataset.tensors

y_pred = fcm.predict(A_X, threshold=0.4, y=y)

acc = ((y_pred.argmax(-1) == y.argmax(-1)) * 1.).mean().item() * 100

print(round(acc, 2))

99.99


In [520]:
y_pred

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]])

In [483]:
(1/4) * 100

25.0