<a href="https://colab.research.google.com/github/shyDaniel/Binary_Classification/blob/master/DEC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DEC Implemented in Pytorch

08/2020

In [None]:
import os
# os.environ['PATH'] = '../caffe/build/tools:'+os.environ['PATH']
import sys
import time
import torch
import numpy as np
import torch.nn as nn
import torch.utils.data as data
from torch.autograd import Variable
import torchvision.datasets as dataset
import torchvision.transforms as transforms
import torch.optim as optim
from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
from scipy.optimize import linear_sum_assignment


nmi = normalized_mutual_info_score
ari = adjusted_rand_score

In [None]:
def acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Implements the Hungarian Algorithm
    with linear_sum_assignment
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    row, col = linear_sum_assignment(w.max() - w)
    sum = 0
    for i in range(len(row)):
        sum += w[row[i], col[i]]
    return sum * 1.0 / y_pred.size

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
batch = 100
path = '../mnist'
train_set = dataset.MNIST(root=path, train=True, transform=transform, download=True)
test_set = dataset.MNIST(root=path, train=False, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(
    dataset=train_set,
    batch_size=batch,
    shuffle=True)
test_loader = torch.utils.data.DataLoader(
    dataset=test_set,
    batch_size=batch,
    shuffle=False)

In [None]:
class DEC_AE(nn.Module):
    """
    Stacked Auto Encoder
    """

    def __init__(self, num_classes, num_features):
        super(DEC_AE, self).__init__()
        self.dropout = nn.Dropout(p=0.1)
        self.fc1 = nn.Linear(28 * 28, 500)
        self.fc2 = nn.Linear(500, 500)
        self.fc3 = nn.Linear(500, 2000)
        self.fc4 = nn.Linear(2000, num_features)
        self.relu = nn.ReLU()
        self.fc_d1 = nn.Linear(500, 28 * 28)
        self.fc_d2 = nn.Linear(500, 500)
        self.fc_d3 = nn.Linear(2000, 500)
        self.fc_d4 = nn.Linear(num_features, 2000)
        self.alpha = 1.0
        self.clusterCenter = nn.Parameter(torch.zeros(num_classes, num_features))
        self.pretrainMode = True
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                torch.nn.init.xavier_uniform_(m.weight)

    def setPretrain(self, mode):
        """To set training mode to pretrain or not,
        so that it can control to run only the Encoder or Encoder+Decoder"""
        self.pretrainMode = mode

    def updateClusterCenter(self, cc):
        """
        To update the cluster center. This is a method for pre-train phase.
        When a center is being provided by kmeans, we need to update it so
        that it is available for further training
        :param cc: the cluster centers to update, size of num_classes x num_features
        """
        self.clusterCenter.data = torch.from_numpy(cc)

    def getTDistribution(self, x, clusterCenter):
        """
        student t-distribution, as same as used in t-SNE algorithm.
         q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.

         :param x: input data, in this context it is encoder output
         :param clusterCenter: the cluster center from kmeans
         """
        xe = torch.unsqueeze(x,1) - clusterCenter
        q = 1.0 / (1.0 + (torch.sum(torch.mul(xe,xe), 2) / self.alpha))
        q = q ** (self.alpha + 1.0) / 2.0
        q = (q.t() / torch.sum(q, 1)).t()
        return q

    def forward(self, x):
        x = x.view(-1, 1 * 28 * 28)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x_e = x
        # if not in pretrain mode, we need encoder and t distribution output
        if not self.pretrainMode:
            return x_e, self.getTDistribution(x, self.clusterCenter)
        ##### encoder is done, followed by decoder #####
        x = self.fc_d4(x)
        x = self.relu(x)
        x = self.fc_d3(x)
        x = self.relu(x)
        x = self.fc_d2(x)
        x = self.relu(x)
        x = self.fc_d1(x)
        x_de = x.view(-1, 1, 28, 28)
        return x_e, x_de

In [None]:
class DEC:
    def __init__(self, n_clusters, n_latent, alpha=1.0):
        self.n_clusters = n_clusters
        self.n_latent = n_latent
        self.alpha = alpha

    @staticmethod
    def target_distribution(q):
        weight = (q ** 2) / q.sum(0)
        # print('q',q)
        return Variable((weight.t() / weight.sum(1)).t().data, requires_grad=True)

    @staticmethod
    def kld(q, p):
        return torch.sum(p * torch.log(p / q), dim=-1)

    def validateOnCompleteTestData(self,test_loader,model):
        model.eval()
        to_eval = np.array([model(d[0])[0].data.cpu().numpy() for i,d in enumerate(test_loader)])
        true_labels = np.array([d[1].cpu().numpy() for i,d in enumerate(test_loader)])
        to_eval = np.reshape(to_eval,(to_eval.shape[0]*to_eval.shape[1],to_eval.shape[2]))
        true_labels = np.reshape(true_labels,true_labels.shape[0]*true_labels.shape[1])
        km = KMeans(n_clusters=len(np.unique(true_labels)), n_init=20, n_jobs=4)
        y_pred = km.fit_predict(to_eval)
        currentAcc = acc(true_labels, y_pred)
        print(' '*8 + '|==>  acc: %.4f,  nmi: %.4f  <==|'
                      % (currentAcc, nmi(true_labels, y_pred)))
        return currentAcc

    def pretrain(self, train_loader, test_loader, epochs):
        dec_ae = DEC_AE(self.n_clusters, self.n_latent)  # auto encoder
        optimizer = optim.SGD(dec_ae.parameters(), lr=1, momentum=0.9)
        best_acc = 0.0
        for epoch in range(epochs):
            dec_ae.train()
            running_loss = 0.0
            for i, data in enumerate(train_loader):
                x = Variable(data[0])
                optimizer.zero_grad()
                x_ae, x_de = dec_ae(x)
                loss = nn.functional.mse_loss(x_de, x, reduction = 'mean')
                loss.backward()
                optimizer.step()
                running_loss += loss.data.cpu().numpy()
                if i % 100 == 99:  # print every 100 mini-batches
                    print('[%d, %5d] loss: %.7f' %
                          (epoch + 1, i + 1, running_loss / 100))
                    running_loss = 0.0
            # now we evaluate the accuracy with AE
            dec_ae.eval()
            currentAcc = self.validateOnCompleteTestData(test_loader, dec_ae)
            if currentAcc > best_acc:
                torch.save(dec_ae, 'bestModel'.format(best_acc))
                best_acc = currentAcc

    def clustering(self, mbk, x, model):
        model.eval()
        y_pred_ae, _ = model(x)
        y_pred_ae = y_pred_ae.data.cpu().numpy()
        mbk.partial_fit(y_pred_ae)
        self.cluster_centers = mbk.cluster_centers_  # keep the cluster centers
        model.updateClusterCenter(self.cluster_centers)

    def train(self, train_loader, test_loader, epochs):
        """This method will start training for DEC cluster"""
        model = torch.load("bestModel")
        model.setPretrain(False)
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        print('Initializing cluster center with pre-trained weights')
        mbk = MiniBatchKMeans(n_clusters=self.n_clusters, n_init=20, batch_size=100)
        got_cluster_center = False
        for epoch in range(epochs):
            for i, data in enumerate(train_loader):
                x, label = data
                x = Variable(x)
                optimizer.zero_grad()
                # step 1 - get cluster center from batch
                # here we are using minibatch kmeans to be able to cope with larger dataset.
                if not got_cluster_center:
                    self.clustering(mbk, x, model)
                    if epoch > 1:
                        got_cluster_center = True
                else:
                    model.train()
                    # now we start training with acquired cluster center
                    feature_pred, q = model(x)
                    p = self.target_distribution(q)
                    loss = self.kld(q, p).mean()
                    loss.backward()
                    optimizer.step()
            currentAcc = self.validateOnCompleteTestData(test_loader, model)

In [None]:
dec = DEC(10, 10)
dec.pretrain(train_loader, test_loader, 100)
dec.train(train_loader, test_loader, 200)