Somraj Gautam

Results on one database which was not used in the paper

New Dataset- Weather Dataset: - https://www.kaggle.com/datasets/vijaygiitk/multiclass-weather-dataset

In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"somraj09","key":"997d1ba4e8a48a85fa75bb9e7f1ec34e"}'}

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [None]:
!kaggle datasets download -d vijaygiitk/multiclass-weather-dataset

Downloading multiclass-weather-dataset.zip to /content
 98% 132M/134M [00:01<00:00, 79.5MB/s]
100% 134M/134M [00:01<00:00, 78.4MB/s]


In [None]:
!unzip /content/multiclass-weather-dataset.zip

In [None]:
import numpy as np
import torchvision
import argparse
from torch.utils import data
import torch.nn as nn
import math
import torch
from torch.nn.functional import normalize
from torchvision.models.resnet import Bottleneck, BasicBlock, conv1x1
import cv2
from sklearn import metrics
import os
import copy

### Function for saving the model at different epochs

In [None]:
def save_model(args, model, optimizer, current_epoch):
    out = os.path.join('/content/Model', "checkpoint_{}.tar".format(current_epoch))
    state = {'net': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': current_epoch}
    torch.save(state, out)

### For calculating loss

In [None]:
class InstanceLoss(nn.Module):
    def __init__(self, batch_size, temperature, device):
        super(InstanceLoss, self).__init__()
        self.batch_size = batch_size
        self.temperature = temperature
        self.device = device

        self.mask = self.mask_correlated_samples(batch_size)
        self.criterion = nn.CrossEntropyLoss(reduction="sum")

    def mask_correlated_samples(self, batch_size):
        N = 2 * batch_size
        mask = torch.ones((N, N))
        mask = mask.fill_diagonal_(0)
        for i in range(batch_size):
            mask[i, batch_size + i] = 0
            mask[batch_size + i, i] = 0
        mask = mask.bool()
        return mask

    def forward(self, z_i, z_j):
        N = 2 * self.batch_size
        z = torch.cat((z_i, z_j), dim=0)

        sim = torch.matmul(z, z.T) / self.temperature
        sim_i_j = torch.diag(sim, self.batch_size)
        sim_j_i = torch.diag(sim, -self.batch_size)

        positive_samples = torch.cat((sim_i_j, sim_j_i), dim=0).reshape(N, 1)
        negative_samples = sim[self.mask].reshape(N, -1)

        labels = torch.zeros(N).to(positive_samples.device).long()
        logits = torch.cat((positive_samples, negative_samples), dim=1)
        loss = self.criterion(logits, labels)
        loss /= N

        return loss


class ClusterLoss(nn.Module):
    def __init__(self, class_num, temperature, device):
        super(ClusterLoss, self).__init__()
        self.class_num = class_num
        self.temperature = temperature
        self.device = device

        self.mask = self.mask_correlated_clusters(class_num)
        self.criterion = nn.CrossEntropyLoss(reduction="sum")
        self.similarity_f = nn.CosineSimilarity(dim=2)

    def mask_correlated_clusters(self, class_num):
        N = 2 * class_num
        mask = torch.ones((N, N))
        mask = mask.fill_diagonal_(0)
        for i in range(class_num):
            mask[i, class_num + i] = 0
            mask[class_num + i, i] = 0
        mask = mask.bool()
        return mask

    def forward(self, c_i, c_j):
        p_i = c_i.sum(0).view(-1)
        p_i /= p_i.sum()
        ne_i = math.log(p_i.size(0)) + (p_i * torch.log(p_i)).sum()
        p_j = c_j.sum(0).view(-1)
        p_j /= p_j.sum()
        ne_j = math.log(p_j.size(0)) + (p_j * torch.log(p_j)).sum()
        ne_loss = ne_i + ne_j

        c_i = c_i.t()
        c_j = c_j.t()
        N = 2 * self.class_num
        c = torch.cat((c_i, c_j), dim=0)

        sim = self.similarity_f(c.unsqueeze(1), c.unsqueeze(0)) / self.temperature
        sim_i_j = torch.diag(sim, self.class_num)
        sim_j_i = torch.diag(sim, -self.class_num)

        positive_clusters = torch.cat((sim_i_j, sim_j_i), dim=0).reshape(N, 1)
        negative_clusters = sim[self.mask].reshape(N, -1)

        labels = torch.zeros(N).to(positive_clusters.device).long()
        logits = torch.cat((positive_clusters, negative_clusters), dim=1)
        loss = self.criterion(logits, labels)
        loss /= N

        return loss + ne_loss


# Main implementation of proposed model

In [None]:
class Network(nn.Module):
    def __init__(self, resnet, feature_dim, class_num):
        super(Network, self).__init__()
        self.resnet = resnet
        self.feature_dim = feature_dim
        self.cluster_num = class_num
        self.instance_projector = nn.Sequential(
            nn.Linear(self.resnet.rep_dim, self.resnet.rep_dim),
            nn.ReLU(),
            nn.Linear(self.resnet.rep_dim, self.feature_dim),
        )
        self.cluster_projector = nn.Sequential(
            nn.Linear(self.resnet.rep_dim, self.resnet.rep_dim),
            nn.ReLU(),
            nn.Linear(self.resnet.rep_dim, self.cluster_num),
            nn.Softmax(dim=1)
        )

    def forward(self, x_i, x_j):
        h_i = self.resnet(x_i)
        h_j = self.resnet(x_j)

        z_i = normalize(self.instance_projector(h_i), dim=1)
        z_j = normalize(self.instance_projector(h_j), dim=1)

        c_i = self.cluster_projector(h_i)
        c_j = self.cluster_projector(h_j)

        return z_i, z_j, c_i, c_j

    def forward_cluster(self, x):
        h = self.resnet(x)
        c = self.cluster_projector(h)
        c = torch.argmax(c, dim=1)
        return c


### Resnet34 implementation which is used as backbone for fair comparison in the original paper

In [None]:
class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.rep_dim = 512 * block.expansion

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _forward_impl(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)

        return x

    def forward(self, x):
        return self._forward_impl(x)


def get_resnet(name):
    resnet34 = ResNet(block=BasicBlock, layers=[3, 4, 6, 3])

    resnets = {
        "ResNet34": resnet34,
    }
    if name not in resnets.keys():
        raise KeyError(f"{name} is not a valid ResNet version")
    return resnets[name]


### Transformation and Gaussian blur to blur the image

In [None]:
class GaussianBlur:
    def __init__(self, kernel_size, min=0.1, max=2.0):
        self.min = min
        self.max = max
        self.kernel_size = kernel_size

    def __call__(self, sample):
        sample = np.array(sample)
        prob = np.random.random_sample()
        if prob < 0.5:
            sigma = (self.max - self.min) * np.random.random_sample() + self.min
            sample = cv2.GaussianBlur(sample, (self.kernel_size, self.kernel_size), sigma)
        return sample


class Transforms:
    def __init__(self, size, s=1.0, mean=None, std=None, blur=False):
        self.train_transform = [
            torchvision.transforms.RandomResizedCrop(size=size),
            torchvision.transforms.RandomHorizontalFlip(),
            torchvision.transforms.RandomApply([torchvision.transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s)],
                                               p=0.8),
            torchvision.transforms.RandomGrayscale(p=0.2),
        ]
        if blur:
            self.train_transform.append(GaussianBlur(kernel_size=23))
        self.train_transform.append(torchvision.transforms.ToTensor())
        self.test_transform = [
            torchvision.transforms.Resize(size=(size, size)),
            torchvision.transforms.ToTensor(),
        ]
        if mean and std:
            self.train_transform.append(torchvision.transforms.Normalize(mean=mean, std=std))
            self.test_transform.append(torchvision.transforms.Normalize(mean=mean, std=std))
        self.train_transform = torchvision.transforms.Compose(self.train_transform)
        self.test_transform = torchvision.transforms.Compose(self.test_transform)

    def __call__(self, x):
        return self.train_transform(x), self.train_transform(x)


## Training

In [None]:
def train():
    loss_epoch = 0
    for step, ((x_i, x_j), _) in enumerate(data_loader):
        optimizer.zero_grad()
        x_i = x_i.to('cuda')
        x_j = x_j.to('cuda')
        z_i, z_j, c_i, c_j = model(x_i, x_j)
        loss_instance = criterion_instance(z_i, z_j)
        loss_cluster = criterion_cluster(c_i, c_j)
        loss = loss_instance + loss_cluster
        loss.backward()
        optimizer.step()
        if step % 50 == 0:
            print(
                f"Step [{step}/{len(data_loader)}]\t loss_instance: {loss_instance.item()}\t loss_cluster: {loss_cluster.item()}")
        loss_epoch += loss.item()
    return loss_epoch


if __name__ == "__main__":

    torch.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    torch.cuda.manual_seed(42)
    np.random.seed(42)

    # prepare data
    dataset = torchvision.datasets.ImageFolder(
            root='/content/dataset',
            transform=Transforms(size=224, blur=True),
        )
    class_num = 6
    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=128,
        shuffle=True,
        drop_last=True,
        num_workers=4,
    )
    # initialize model
    res = get_resnet("ResNet34")
    model = Network(res, 128, class_num)
    model = model.to('cuda')
    # optimizer / loss
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=0.)
    loss_device = torch.device("cuda")
    criterion_instance = InstanceLoss(128, 0.5, loss_device).to(loss_device)
    criterion_cluster = ClusterLoss(class_num, 1.0, loss_device).to(loss_device)
    # train
    for epoch in range(1, 100):
        lr = optimizer.param_groups[0]["lr"]
        loss_epoch = train()
        if epoch==0:
            save_model('/content/Model', model, optimizer, epoch)
        if epoch==20:
            save_model('/content/Model', model, optimizer, epoch)
        if epoch==50:
            save_model('/content/Model', model, optimizer, epoch)
        print(f"Epoch [{epoch}/{100}]\t Loss: {loss_epoch / len(data_loader)}")
    save_model('/content/Model', model, optimizer, 100)


Step [0/11]	 loss_instance: 5.514197826385498	 loss_cluster: 2.430302143096924
Epoch [1/100]	 Loss: 7.759407303550026
Step [0/11]	 loss_instance: 5.319942474365234	 loss_cluster: 2.212207317352295
Epoch [2/100]	 Loss: 7.422675826332786
Step [0/11]	 loss_instance: 5.437200546264648	 loss_cluster: 2.193316698074341
Epoch [3/100]	 Loss: 7.415146784348921
Step [0/11]	 loss_instance: 5.304917812347412	 loss_cluster: 2.117854595184326
Epoch [4/100]	 Loss: 7.354842402718284
Step [0/11]	 loss_instance: 5.138856887817383	 loss_cluster: 2.0897817611694336
Epoch [5/100]	 Loss: 7.32236723466353
Step [0/11]	 loss_instance: 5.172810077667236	 loss_cluster: 2.0502753257751465
Epoch [6/100]	 Loss: 7.2449844967235215
Step [0/11]	 loss_instance: 5.174536228179932	 loss_cluster: 2.110349655151367
Epoch [7/100]	 Loss: 7.236055504192006
Step [0/11]	 loss_instance: 5.122867584228516	 loss_cluster: 2.0262434482574463
Epoch [8/100]	 Loss: 7.198570511557839
Step [0/11]	 loss_instance: 5.174371242523193	 loss_c

In [None]:
!pip install munkres

Collecting munkres
  Downloading munkres-1.1.4-py2.py3-none-any.whl (7.0 kB)
Installing collected packages: munkres
Successfully installed munkres-1.1.4


## Testing phase

In [None]:
from munkres import Munkres

def evaluate(label, pred):
    nmi = metrics.normalized_mutual_info_score(label, pred)
    ari = metrics.adjusted_rand_score(label, pred)
    f = metrics.fowlkes_mallows_score(label, pred)
    pred_adjusted = get_y_preds(label, pred, len(set(label)))
    acc = metrics.accuracy_score(pred_adjusted, label)
    return nmi, ari, f, acc


def calculate_cost_matrix(C, n_clusters):
    cost_matrix = np.zeros((n_clusters, n_clusters))
    for j in range(n_clusters):
        s = np.sum(C[:, j])
        for i in range(n_clusters):
            t = C[i, j]
            cost_matrix[j, i] = s - t
    return cost_matrix


def get_cluster_labels_from_indices(indices):
    n_clusters = len(indices)
    cluster_labels = np.zeros(n_clusters)
    for i in range(n_clusters):
        cluster_labels[i] = indices[i][1]
    return cluster_labels

def get_y_preds(y_true, cluster_assignments, n_clusters):
    confusion_matrix = metrics.confusion_matrix(y_true, cluster_assignments, labels=None)
    cost_matrix = calculate_cost_matrix(confusion_matrix, n_clusters)
    indices = Munkres().compute(cost_matrix)
    kmeans_to_true_cluster_labels = get_cluster_labels_from_indices(indices)
    if np.min(cluster_assignments) != 0:
        cluster_assignments = cluster_assignments - np.min(cluster_assignments)
    y_pred = kmeans_to_true_cluster_labels[cluster_assignments]
    return y_pred

### Result after 20 epochs

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def inference(loader, model, device):
    model.eval()
    feature_vector = []
    labels_vector = []
    for step, (x, y) in enumerate(loader):
        x = x.to(device)
        with torch.no_grad():
            c = model.forward_cluster(x)
        c = c.detach()
        feature_vector.extend(c.cpu().detach().numpy())
        labels_vector.extend(y.numpy())
        if step % 20 == 0:
            print(f"Step [{step}/{len(loader)}]\t Computing features...")
    feature_vector = np.array(feature_vector)
    labels_vector = np.array(labels_vector)
    print("Features shape {}".format(feature_vector.shape))
    return feature_vector, labels_vector

dataset = torchvision.datasets.ImageFolder(
        root='/content/dataset',
        transform=Transforms(size=224).test_transform,
    )
class_num = 6
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=500,
    shuffle=False,
    drop_last=False,
    num_workers=4,
)

res = get_resnet('ResNet34')
model = Network(res, 128, class_num)
model_fp = os.path.join('/content/Model', "checkpoint_{}.tar".format('20'))
model.load_state_dict(torch.load(model_fp, map_location=device.type)['net'])
model.to(device)

print("### Creating features from model ###")
X, Y = inference(data_loader, model, device)
nmi, ari, f, acc = evaluate(Y, X)
print("NMI, ARI, F, ACC at 20 epoch")
print('NMI = {:.4f} ARI = {:.4f} F = {:.4f} ACC = {:.4f}'.format(nmi, ari, f, acc))

### Creating features from model ###
Step [0/4]	 Computing features...
Features shape (1530,)
6 1530
NMI, ARI, F, ACC at 20 epoch
NMI = 0.3404 ARI = 0.2909 F = 0.4276 ACC = 0.5667


## Result after 50 epochs

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def inference(loader, model, device):
    model.eval()
    feature_vector = []
    labels_vector = []
    for step, (x, y) in enumerate(loader):
        x = x.to(device)
        with torch.no_grad():
            c = model.forward_cluster(x)
        c = c.detach()
        feature_vector.extend(c.cpu().detach().numpy())
        labels_vector.extend(y.numpy())
        if step % 20 == 0:
            print(f"Step [{step}/{len(loader)}]\t Computing features...")
    feature_vector = np.array(feature_vector)
    labels_vector = np.array(labels_vector)
    print("Features shape {}".format(feature_vector.shape))
    return feature_vector, labels_vector

dataset = torchvision.datasets.ImageFolder(
        root='/content/dataset',
        transform=Transforms(size=224).test_transform,
    )
class_num = 6
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=500,
    shuffle=False,
    drop_last=False,
    num_workers=4,
)

res = get_resnet('ResNet34')
model = Network(res, 128, class_num)
model_fp = os.path.join('/content/Model', "checkpoint_{}.tar".format('50'))
model.load_state_dict(torch.load(model_fp, map_location=device.type)['net'])
model.to(device)

print("### Creating features from model ###")
X, Y = inference(data_loader, model, device)
nmi, ari, f, acc = evaluate(Y, X)
print("NMI, ARI, F, ACC at 50 epoch")
print('NMI = {:.4f} ARI = {:.4f} F = {:.4f} ACC = {:.4f}'.format(nmi, ari, f, acc))

### Creating features from model ###
Step [0/4]	 Computing features...
Features shape (1530,)
6 1530
NMI, ARI, F, ACC at 50 epoch
NMI = 0.4812 ARI = 0.4644 F = 0.5627 ACC = 0.6719


## Result after 100 epochs

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def inference(loader, model, device):
    model.eval()
    feature_vector = []
    labels_vector = []
    for step, (x, y) in enumerate(loader):
        x = x.to(device)
        with torch.no_grad():
            c = model.forward_cluster(x)
        c = c.detach()
        feature_vector.extend(c.cpu().detach().numpy())
        labels_vector.extend(y.numpy())
        if step % 20 == 0:
            print(f"Step [{step}/{len(loader)}]\t Computing features...")
    feature_vector = np.array(feature_vector)
    labels_vector = np.array(labels_vector)
    print("Features shape {}".format(feature_vector.shape))
    return feature_vector, labels_vector

dataset = torchvision.datasets.ImageFolder(
        root='/content/dataset',
        transform=Transforms(size=224).test_transform,
    )
class_num = 6
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=500,
    shuffle=False,
    drop_last=False,
    num_workers=4,
)

res = get_resnet('ResNet34')
model = Network(res, 128, class_num)
model_fp = os.path.join('/content/Model', "checkpoint_{}.tar".format('100'))
model.load_state_dict(torch.load(model_fp, map_location=device.type)['net'])
model.to(device)

print("### Creating features from model ###")
X, Y = inference(data_loader, model, device)
nmi, ari, f, acc = evaluate(Y, X)
print("NMI, ARI, F, ACC at 100 epoch")
print('NMI = {:.4f} ARI = {:.4f} F = {:.4f} ACC = {:.4f}'.format(nmi, ari, f, acc))

### Creating features from model ###
Step [0/4]	 Computing features...
Features shape (1530,)
6 1530
NMI, ARI, F, ACC at 100 epoch
NMI = 0.5169 ARI = 0.4550 F = 0.5551 ACC = 0.6458


***As the size of dataset was very less as compared to the datasets used in the paper. The accuracy which we got after 20, 50 and 100 epochs is shown above. Hence part 3 also completed by implementing everything from scratch.***