In [1]:
import torch
import sys
import math
from torch import nn
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from torch.nn import Parameter
import torch.nn.functional as F
import torch.optim as optim
# %matplotlib inline  
from torch.utils.data.sampler import Sampler, SubsetRandomSampler
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics.cluster import adjusted_rand_score, adjusted_mutual_info_score, normalized_mutual_info_score
from torch.utils.data.dataset import Dataset
from sklearn.metrics import roc_auc_score as AUC
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.stats import mode
from sklearn.model_selection import train_test_split
from scipy.special import erf
from sklearn.metrics import average_precision_score as PR_AUC
import copy
from scipy.io import arff
import os
import Utils.utils as utils
from sklearn.manifold import TSNE
# import hdbscan
from Utils.utils import acc, BalancedBatchSampler
from model import VAE as CVAE
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_recall_fscore_support
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.mixture import GaussianMixture
import itertools
from PIL import Image

In [2]:
class CustomDataset1(Dataset):
    def __init__(self, dataset):
#         data = pd.read_csv('../Data/KDDCup99/KDDCup99_withoutdupl_norm_1ofn.csv', header=None)
#         data = pd.read_csv('../Data/Synthetic/dim_8_cluster_1.csv/1_8_10000_0.5/dim.csv', header=None)
        features = pd.read_csv(f'../Data/Dataset/{dataset}/dim.csv', header=None)
        labels = pd.read_csv(f'../Data/Dataset/{dataset}/label.csv', header=None)
        self.features = features.values
        self.train_labels = np.squeeze(labels.values)
        self.mean = np.mean(self.features, axis=0)
        self.std = np.std(self.features, axis=0)
# #         self.min = np.amin(self.features, axis=0)
# #         self.max = np.amax(self.features, axis=0)
#         self.features = (self.features - self.mean)/self.std
#         self.features = np.nan_to_num(self.features)
        self.data_len = len(self.features)

    def __getitem__(self, index):
        
        feature = self.features[index]

        feature_as_tensor = torch.from_numpy(feature).float()

        # Get label(class) of the image based on the cropped pandas column
        label = self.train_labels[index]

        return (feature_as_tensor, label)

    def __len__(self):
        return self.data_len
    
class CustomDataset2(Dataset):
    def __init__(self):
        cats = ['alt.atheism',
                 'comp.graphics',
                 'comp.os.ms-windows.misc',
                 'comp.sys.ibm.pc.hardware',
                 'comp.sys.mac.hardware',
                 'comp.windows.x',
                 'misc.forsale',
                 'rec.autos',
                 'rec.motorcycles',
                 'rec.sport.baseball']
        newsgroups_train = fetch_20newsgroups(subset='train', categories=cats,
                                      remove=('headers', 'footers', 'quotes'))
        vectorizer = TfidfVectorizer()
        self.features = vectorizer.fit_transform(newsgroups_train.data).toarray()
        self.labels =  newsgroups_train.target
        self.data_len = len(self.features)

    def __getitem__(self, index):
        
        feature = self.features[index]

        feature_as_tensor = torch.from_numpy(feature).float()

        # Get label(class) of the image based on the cropped pandas column
        label = self.labels[index]

        return (feature_as_tensor, label)

    def __len__(self):
        return self.data_len

class CustomDataset3(Dataset):
    def __init__(self, X, y):
        
        self.train = True
        self.X = torch.from_numpy(X).float()
        self.train_labels = torch.from_numpy(y)
        self.data_len = len(y)
        
    def __getitem__(self, index):
        
        x = self.X[index]

        # Get label(class) of the image based on the cropped pandas column
        label = self.train_labels[index]

        return (x, label)

    def __len__(self):
        return self.data_len

class CustomDataset4(Dataset):
    def __init__(self, X):
        
        self.train = True
        self.X = torch.LongTensor(X)
        self.data_len = len(X)
        
    def __getitem__(self, index):
        
        x = self.X[index]
        return x

    def __len__(self):
        return self.data_len
    
class MySampler(Sampler):
    def __init__(self, mask):
        self.mask = torch.from_numpy(mask)

    def __iter__(self):
        return (self.indices[i] for i in torch.nonzero(self.mask))

    def __len__(self):
        return len(self.mask)

In [3]:
def set_bn_eval(m):
    classname = m.__class__.__name__
    if classname.find('BatchNorm') != -1:
        m.eval()

def set_bn_train(m):
    classname = m.__class__.__name__
    if classname.find('BatchNorm') != -1:
        m.train()

def check_bn_eval(m):
    classname = m.__class__.__name__
    if classname.find('BatchNorm') != -1:
        if(m.training):
            print('problem')
        else:
            print('converted BN **')

In [4]:
class AE(nn.Module):
    def __init__(self, ndims=784, nlatent=10):
        super(AE, self).__init__()
        self.ndims = ndims
        self.nlatent = nlatent
        self.fc1 = self.custom_linear(ndims, 500)
        self.fc2 = self.custom_linear(500, 500)
        self.fc3 = self.custom_linear(500, 2000)
        self.fc4 = nn.Linear(2000, nlatent)
        self.fc5 = self.custom_linear(nlatent, 2000)
        self.fc6 = self.custom_linear(2000, 500)
        self.fc7 = self.custom_linear(500, 500)
        self.fc8 = self.custom_linear(500, ndims)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def encode(self, x):
        h1 = self.fc1(x)
        h2 = self.fc2(h1)
        h3 = self.fc3(h2)
        return self.fc4(h3)

    def decode(self, z):
        h1 = self.fc5(z)
        h2 = self.fc6(h1)
        h3 = self.fc7(h2)
        return self.sigmoid(self.fc8(h3))

    def forward(self, x, vae=0):
        x = x.view(-1, self.ndims)
        z = self.encode(x)
        return z, self.decode(z), None, None

    def custom_linear(self, in_size, out_size):
        return nn.Sequential(
            nn.Linear(in_size, out_size),
            nn.BatchNorm1d(out_size),
            nn.ReLU(),
        ) 

In [5]:
class DecTvae(nn.Module):
    def __init__(self, dataset='MNIST_NN', ndim=32, nchannels=1, n_clusters=10, z_dim=10, kernel_num=128, 
                 lr=0.001, weight_decay=1e-03, batch_size=120, n_epochs=3, alpha=1., gamma=0.1, vae=0):
        super(self.__class__, self).__init__()
        self.z_dim = z_dim
        self.vae = vae
        self.dataset = dataset
        self.ndim = ndim
        self.nchannels = nchannels
        self.kernel_num = kernel_num
        self.lr = lr
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.n_epochs = n_epochs
#         self.model = CVAE(dataset, ndim, nchannels, kernel_num, z_dim)
        self.model = AE(ndim*ndim, z_dim) 
        self.out_linear = nn.Linear(z_dim, n_clusters)
        self.cross_entropy = nn.CrossEntropyLoss()
#         self.model = VAE(ndims=1024)
        self.oneD = False
        self.use_cuda = torch.cuda.is_available()
        self.results_dir = f'../Results/dectvae/{dataset}/'
        self.models_dir = f'Models/dectvae/{dataset}/'
        self.MSELoss = nn.MSELoss()
        self.triplet_loss = nn.TripletMarginLoss()
        if(not os.path.exists(self.results_dir)):
            os.makedirs(self.results_dir)
        if(not os.path.exists(self.models_dir)):
            os.makedirs(self.models_dir)
        if(self.use_cuda):
#             self.model.cuda()
            self.cross_entropy.cuda()
        utils.initialize_weights(self.model)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.gamma = gamma
        self.mu = Parameter(torch.Tensor(n_clusters, z_dim))
        self.reorder = None

    def Variational(self, vae):
        self.vae = vae
        
    def save_model(self, path):
        torch.save(self.state_dict(), f'{self.models_dir}/{path}')

    def load_model(self, path):
        pretrained_dict = torch.load(f'{self.models_dir}/{path}', map_location=lambda storage, loc: storage)
        model_dict = self.state_dict()
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
        model_dict.update(pretrained_dict) 
        self.load_state_dict(model_dict)

    def forward(self, x):
        z, mux, mean, logvar = self.model(x, self.vae)
        if(self.vae):
            return z, mux, mean, logvar
        # compute q -> NxK
        q = self.soft_assign(z)
        return z, q, mux

    def soft_assign(self, z):
        q = 1.0 / (1.0 + torch.sum((z.unsqueeze(1) - self.mu)**2, dim=2) / self.alpha)
        q = q**(self.alpha+1.0)/2.0
        q = q / torch.sum(q, dim=1, keepdim=True)
        return q

    def encodeBatch(self, X):
        batch_size = self.batch_size
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
        
        encoded = []
#         self.eval()
        num = X.shape[0]
        num_batch = int(math.ceil(1.0*X.shape[0]/batch_size))
        for batch_idx in range(num_batch):
            xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)]
            inputs = Variable(xbatch)
            if(use_cuda):
                inputs = inputs.cuda()
#             print(inputs.size())
            z,_, _ = self.forward(inputs)
            encoded.append(z.data)

        encoded = torch.cat(encoded, dim=0)
#         print(encoded.size())
        return encoded


    def loss_function(self, x, xrecon, p, q):
        def kld(target, pred):
            return torch.mean(torch.sum(target*torch.log(target/(pred+1e-6)), dim=1))
        
        kldloss = kld(p, q)
        recon_loss = nn.BCELoss()(xrecon, x.view(-1, self.ndim*self.ndim))#torch.mean((xrecon-x)**2)
        loss = recon_loss + self.gamma*kldloss
        '''
        experiment: returning kldloss
        '''
        return loss

    def target_distribution(self, q):
        p = q**2 / torch.sum(q, dim=0)
        p = p / torch.sum(p, dim=1, keepdim=True)
        return p

    def eval_clustering(self, validloader):
        if(self.use_cuda):
            self.cuda()
        self.eval()
        z_agg = []
        y = []
        kmeans = KMeans(self.n_clusters, n_init=20)
        for batch_idx, (inputs, labels) in enumerate(validloader):
            inputs = Variable(inputs)
            if(self.use_cuda):
                inputs = inputs.cuda()
            z, _, mux = self.forward(inputs)
            z_agg.append(z)
            y.extend(labels.data.cpu().numpy())

        y = np.array(y)
        latent = torch.cat(z_agg, dim=0)
        y_pred = kmeans.fit_predict(latent.data.cpu().numpy())
        if y is not None:
            print("***Kmeans Validation acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))


        q = self.soft_assign(latent)
        # evalute the clustering performance
        y_pred = torch.argmax(q, dim=1).data.cpu().numpy()
        if y is not None:
            print("***Validation acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
    
    def pretrain(self, trainloader, validloader):
        if(self.use_cuda):
            self.cuda()
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=self.lr)

        for epoch in range(self.n_epochs):
            self.train()
            assert self.model.training == True
            tloss = 0
            for batch_idx, (inputs, labels) in enumerate(trainloader):
                inputs = Variable(inputs)
                if(self.use_cuda):
                    inputs = inputs.cuda()
                z, _, mux = self.forward(inputs)
                loss = nn.BCELoss()(mux, inputs.view(-1, self.ndim*self.ndim))
                tloss += loss.item()
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            print(epoch, ":", tloss/len(trainloader))
            
#             if(epoch%20 == 0):
#                 self.base_clustering(validloader)
        
   
    def train_classifier(self, trainloader):
        if(self.use_cuda):
            self.cuda()
        self.train()
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=self.lr)
        loss_T = 0
        for _ in range(20):
            Y = []
            Y_pred = []
            for inputs, labels in trainloader:
                if(self.use_cuda):
                    inputs = inputs.cuda()
                    labels = labels.cuda()

                inputs = Variable(inputs)

                z, q, _ = self.forward(inputs)
                logits = self.out_linear(z)
                loss = self.cross_entropy(logits, labels)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                Y.append(labels.cpu())
                y_pred = torch.argmax(logits, 1)
                Y_pred.append(y_pred.data.cpu())
                loss_T += loss.item()
            Y = torch.cat(Y)
            Y_pred = torch.cat(Y_pred)
#             accuracy = torch.sum(Y==Y_pred).item() / len(Y)
            accuracy = acc(Y.numpy(), Y_pred.numpy())
            print(f'Loss = {loss_T/len(trainloader)}  Accuracy={accuracy}')
    
    def validate_classifier(self, trainloader):
        if(self.use_cuda):
            self.cuda()
        self.eval()
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=self.lr)
        loss_T = 0
        Y = []
        Y_pred = []
        for inputs, labels in trainloader:
            if(self.use_cuda):
                inputs = inputs.cuda()
                labels = labels.cuda()

            inputs = Variable(inputs)

            z, q, _ = self.forward(inputs)
            logits = self.out_linear(z)
            loss = self.cross_entropy(logits, labels)

            Y.append(labels.cpu())
            y_pred = torch.argmax(logits, 1)
            Y_pred.append(y_pred.data.cpu())
            loss_T += loss.item()
        Y = torch.cat(Y)
        Y_pred = torch.cat(Y_pred)
#         accuracy = torch.sum(Y==Y_pred).item() / len(Y)
        accuracy = acc(Y.numpy(), Y_pred.numpy())
        print(f'Loss = {loss_T/len(trainloader)}  Accuracy={accuracy}')
        
        
    def fit(self, dataloader, validloader, update_interval=1, tol=1e-3):
        '''X: tensor data'''
        X = None
        y = None
        for featuresT,labelsT in dataloader:
            X = featuresT
            y = labelsT
        batch_size = self.batch_size
        num_epochs = self.n_epochs
        lr = self.lr
        if(self.use_cuda):
            self.cuda()
        print("=====Training IDEC=======")
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr)
        # optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9)

        self.train()
        self.apply(set_bn_eval)
        print("Initializing cluster centers with kmeans.")
        kmeans = KMeans(self.n_clusters, n_init=20)
        data = self.encodeBatch(X)
        y_pred = kmeans.fit_predict(data.data.cpu().numpy())
        y_pred_last = y_pred
        self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_))
        if y is not None:
            y = y.cpu().numpy()
            print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))

        num = X.shape[0]
        num_batch = int(math.ceil(1.0*X.shape[0]/batch_size))
        for epoch in range(num_epochs):
            self.train()
            self.apply(set_bn_eval)
            for featuresT,labelsT in dataloader:
                X = featuresT
                y = labelsT
            if epoch%update_interval == 0:
                # update the targe distribution p
                latent = self.encodeBatch(X)
                q = self.soft_assign(latent)
                p = self.target_distribution(q).data

                # evalute the clustering performance
                y_pred = torch.argmax(q, dim=1).data.cpu().numpy()
                if y is not None:
                    y = y.cpu().numpy()
                    
                    print("acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))

                # check stop criterion
                delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / num
                y_pred_last = y_pred
                if epoch>0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print("Reach tolerance threshold. Stopping training.")
                    break

            # train 1 epoch
            self.apply(set_bn_train)
            train_loss = 0.0
            for batch_idx in range(num_batch):
                xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)]
                pbatch = p[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)]
                
                optimizer.zero_grad()
                inputs = Variable(xbatch)
                target = Variable(pbatch)
                if(self.use_cuda):
                    inputs = inputs.cuda()
                    target = target.cuda()
                z, qbatch, xrecon = self.forward(inputs)
                loss = self.loss_function(inputs, xrecon, target, qbatch)
                train_loss += loss.item()*len(inputs)
                loss.backward()
                optimizer.step()

            print("#Epoch %3d: Loss: %.4f" % (
                epoch+1, train_loss / num))
            
            if(epoch%5 == 0):
                self.eval_clustering(validloader)
            
    
#     def train_semisupervised(self, dataloader, dataloader_l, validloader, triplet_selector=None, update_interval=1, tol=1e-3):
#         self.train_triplet(dataloader_l, validloader, triplet_selector)
#         self.fit(dataloader, validloader)
        
#     def train_supervised(self, dataloader)
    
    def train_semisupervised(self, dataloader, dataloader_l, validloader, update_interval=1, tol=1e-3):
        '''X: tensor data'''
        X = None
        y = None
        for featuresT,labelsT in dataloader:
            X = featuresT
            y = labelsT
        batch_size = self.batch_size
        num_epochs = self.n_epochs
        lr = self.lr
        if(self.use_cuda):
            self.cuda()
        print("=====Training IDEC=======")
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr)
        # optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9)
        
        print("Initializing cluster centers with kmeans.")
        kmeans = KMeans(self.n_clusters, n_init=20)
        data = self.encodeBatch(X)
        y_pred = kmeans.fit_predict(data.data.cpu().numpy())
        y_pred_last = y_pred
        self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_))
        if y is not None:
            y = y.cpu().numpy()
            print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
 
        num = X.shape[0]
        num_batch = int(math.ceil(1.0*X.shape[0]/batch_size))
        dataiter_l = iter(dataloader_l)
        for epoch in range(num_epochs):
            self.train()
            self.visualise_embeddings(validloader, f'{epoch}')
            for featuresT,labelsT in dataloader:
                X = featuresT
                y = labelsT
            if epoch%update_interval == 0:
                # update the targe distribution p
                latent = self.encodeBatch(X)
                q = self.soft_assign(latent)
                if(epoch>30):
                    self.save_images(X.data.cpu().numpy(), q.data.cpu().numpy())
                    assert 1==2
                p = self.target_distribution(q).data

                # evalute the clustering performance
                y_pred = torch.argmax(q, dim=1).data.cpu().numpy()
                if y is not None:
                    y = y.cpu().numpy()
                    
                    print("acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))

                # check stop criterion
                delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / num
                y_pred_last = y_pred
                if epoch>0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print("Reach tolerance threshold. Stopping training.")
                    break

            # train 1 epoch
            train_loss = 0.0
            for batch_idx in range(num_batch):
                
                try:
                    inputs_l, labels_l = next(dataiter_l)
                except StopIteration:
                    dataiter_l = iter(dataloader_l)
                    inputs_l, labels_l = next(dataiter_l)
                
                if(self.use_cuda):
                    inputs_l = inputs_l.cuda()
                    labels_l = labels_l.cuda()
                    
                inputs_l = Variable(inputs_l)
                
                z_l, q_l, _ = self.forward(inputs_l)
                
                xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)]
                pbatch = p[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)]
                
                optimizer.zero_grad()
                inputs = Variable(xbatch)
                target = Variable(pbatch)
                if(self.use_cuda):
                    inputs = inputs.cuda()
                    target = target.cuda()
                z, qbatch, xrecon = self.forward(inputs)
                loss_u = self.loss_function(inputs, xrecon, target, qbatch)
#                 logits = self.out_linear(z_l)
                loss_l = self.cross_entropy(q_l, labels_l)
                loss = loss_l #+ loss_u
                train_loss += loss.item()*len(inputs)
                loss.backward()
                optimizer.step()

            print("#Epoch %3d: Loss: %.4f" % (
                epoch+1, train_loss / num))
            
            if(epoch%10 == 0):
                self.eval()
                z_agg = []
                y = []
                for batch_idx, (inputs, labels) in enumerate(validloader):
                    inputs = Variable(inputs)
                    if(self.use_cuda):
                        inputs = inputs.cuda()
                    z, _, mux = self.forward(inputs)
                    z_agg.append(z)
                    y.extend(labels.data.cpu().numpy())
                
                y = np.array(y)
                latent = torch.cat(z_agg, dim=0)
                y_pred = kmeans.fit_predict(latent.data.cpu().numpy())
                if y is not None:
                    print("***Kmeans Validation acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))

                
                q = self.soft_assign(latent)
                # evalute the clustering performance
                y_pred = torch.argmax(q, dim=1).data.cpu().numpy()
                if y is not None:
                    print("***Validation acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
        
        
    def save_images(self, X, q):
        y_pred = np.argmax(q, axis=1)
        q = np.amax(q, axis=1)
        for i in range(10):
            X_subset = X[y_pred == i]
            q_subset_ind = np.argsort(q[y_pred == i])
            X_subset = X_subset[q_subset_ind]
            X_subset = X_subset[-10:]
#             print(X_subset.shape)
            for k, data in enumerate(X_subset):
                data = data[0]
                rescaled = (255.0 / data.max() * (data - data.min())).astype(np.uint8)
                im = Image.fromarray(rescaled)
                im.save(f'{self.results_dir}/{i}_{k}.png')
        
    def visualise_embeddings(self, trainloader, imname=''):
        if(self.use_cuda):
            self.cuda()
        self.eval()
        z_agg = []
        labels_agg = []
        inputs_agg = []
        for batch_idx, (inputs, labels) in enumerate(trainloader):
            if(self.use_cuda):
                inputs = inputs.cuda()
            inputs = Variable(inputs)
            inputs_agg.append(inputs)
            z, _, _ = self.forward(inputs)
            z_agg.append(z.data.cpu().numpy())
            labels_agg.extend(labels.data.cpu().numpy())
            
        labels_agg = np.array(labels_agg)
        z_agg = np.concatenate(z_agg, axis=0)
#         z_agg = z_agg.data.cpu().numpy()
        z_compressed = TSNE(n_components=2).fit_transform(z_agg)
        plt.figure(figsize=(10, 10))
        plt.scatter(z_compressed[:, 0], z_compressed[:, 1], c=labels_agg, cmap='brg')
        plt.colorbar()
        plt.savefig(f'{self.results_dir}/{imname}_emb.png')
        plt.close('all')  
            
            
    

In [6]:
MNSIT_transform = transforms.Compose([transforms.ToTensor(),
    transforms.ToPILImage(),
    transforms.Pad(2),
    transforms.ToTensor(),])

batch_size = 120
traindataset = datasets.FashionMNIST('../Data/FashionMNIST', 
                           download=True, 
                           train=True, 
                           transform=MNSIT_transform)
testdataset = datasets.FashionMNIST('../Data/FashionMNIST', 
                           download=True, 
                           train=False, 
                           transform=MNSIT_transform)
fulldata = torch.utils.data.ConcatDataset([traindataset, testdataset])

train_loader = torch.utils.data.DataLoader(traindataset,
                                            batch_size=batch_size,
                                            shuffle=True)
test_loader = torch.utils.data.DataLoader(testdataset,
                                            batch_size=batch_size,
                                            shuffle=True)

valid_loader_full = torch.utils.data.DataLoader(testdataset,
                                        batch_size=batch_size,
                                        shuffle=False)
# traindataset = CustomDataset1()

# full_loader = torch.utils.data.DataLoader(fulldata,
#                                          batch_size=batch_size,
#                                          shuffle = True)

data_loader = torch.utils.data.DataLoader(traindataset,
                                         batch_size = len(traindataset),
                                         shuffle = True)
data_loader_test = torch.utils.data.DataLoader(testdataset,
                                         batch_size = len(testdataset),
                                         shuffle = True)

for featuresT,labelsT in data_loader:
    features = featuresT.numpy()
    labels = labelsT.numpy()

_, X_supervised, _, y_supervised = train_test_split(features, labels,
                                            stratify=labels, 
                                            test_size=0.166,
                                            random_state=42)


trainset_su = CustomDataset3(X_supervised,y_supervised)
train_loader_su = torch.utils.data.DataLoader(trainset_su,
                                            batch_size=len(trainset_su),
                                            shuffle=True)

In [7]:
train_batch_sampler = BalancedBatchSampler(trainset_su, n_classes=10, n_samples=5)
# test_batch_sampler = BalancedBatchSampler(testdataset, n_classes=10, n_samples=25)

kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {}
batch_train_loader_su = torch.utils.data.DataLoader(trainset_su, batch_sampler=train_batch_sampler, **kwargs)
# online_test_loader = torch.utils.data.DataLoader(testdataset, batch_sampler=test_batch_sampler, **kwargs)

In [None]:
#train simple cross entropy classifier
dectvae = DecTvae(n_epochs = 30)
dectvae.train_classifier(batch_train_loader_su)
dectvae.validate_classifier(valid_loader_full)

In [None]:
dectvae = DecTvae(n_epochs = 40, dataset='FashionMNIST_NN')
# dectvae.pretrain(train_loader, valid_loader_full)
# dectvae.save_model('pretrain_w_BN.pt')
dectvae.load_model('pretrain_w_BN.pt')
# dectvae.fit(data_loader, valid_loader_full)
# dectvae.save_model('model_w_BN.pt')
dectvae.train_semisupervised(data_loader, batch_train_loader_su, valid_loader_full)
# dectvae.save_model('baseline_100.pt')
# dectvae.load_model('baseline_100.pt')

Initializing cluster centers with kmeans.
Kmeans acc: 0.51562, nmi: 0.49439




acc: 0.51753, nmi: 0.49482
#Epoch   1: Loss: 1.7560




***Kmeans Validation acc: 0.78660, nmi: 0.73240
***Validation acc: 0.79730, nmi: 0.73796




acc: 0.80490, nmi: 0.74670
#Epoch   2: Loss: 1.6552




acc: 0.81953, nmi: 0.76973
#Epoch   3: Loss: 1.6355




acc: 0.81340, nmi: 0.76435
#Epoch   4: Loss: 1.6255




acc: 0.82155, nmi: 0.77067
#Epoch   5: Loss: 1.6212




acc: 0.82992, nmi: 0.77843
#Epoch   6: Loss: 1.6126




acc: 0.83183, nmi: 0.78320
#Epoch   7: Loss: 1.6040




acc: 0.81960, nmi: 0.76215
#Epoch   8: Loss: 1.5998




acc: 0.83863, nmi: 0.77560
#Epoch   9: Loss: 1.5794




acc: 0.86618, nmi: 0.78179
#Epoch  10: Loss: 1.5619




acc: 0.87258, nmi: 0.79009
#Epoch  11: Loss: 1.5570




***Kmeans Validation acc: 0.85890, nmi: 0.77555
***Validation acc: 0.85830, nmi: 0.77445




acc: 0.87628, nmi: 0.79783
#Epoch  12: Loss: 1.5514




acc: 0.87737, nmi: 0.79476
#Epoch  13: Loss: 1.5454




acc: 0.87978, nmi: 0.79846
#Epoch  14: Loss: 1.5437




acc: 0.85807, nmi: 0.78406
#Epoch  15: Loss: 1.5402




acc: 0.88222, nmi: 0.80026
#Epoch  16: Loss: 1.5375




acc: 0.88155, nmi: 0.80041
#Epoch  17: Loss: 1.5360




acc: 0.87397, nmi: 0.79521
#Epoch  18: Loss: 1.5327




acc: 0.87847, nmi: 0.79686
#Epoch  19: Loss: 1.5317




acc: 0.87768, nmi: 0.79513
#Epoch  20: Loss: 1.5321




acc: 0.87937, nmi: 0.79493
#Epoch  21: Loss: 1.5271




***Kmeans Validation acc: 0.85960, nmi: 0.77222
***Validation acc: 0.85950, nmi: 0.77237




acc: 0.88258, nmi: 0.80032
#Epoch  22: Loss: 1.5274




acc: 0.88078, nmi: 0.80060
#Epoch  23: Loss: 1.5214


In [None]:
dectvae.visualise_embeddings(test_loader, '1000')

In [None]:
from PIL import Image

In [None]:
a = torch.tensor([1,2,3])
b = torch.tensor([1,2,6])
torch.cat([a,b])

In [None]:
png.from_array([[255, 0, 0, 255],
                [0, 255, 255, 0]], 'L').save("small_smiley.png")