In [113]:
# Import the packages
import torch
import os
import argparse
import numpy as np
import os.path as osp
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from tqdm import tqdm
#from torcheval.metrics import R2Score # To be implemented
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Linear
from torch.nn import BatchNorm1d
import torch.optim as optim

#from aux_func import*
from datset_process import *
from torch.utils.data import DataLoader

# Import the pretrained default model resnet18/resnet50/resnet101
from torchvision.models import resnet50

In [114]:
# The entropy_loss function measures the entropy of the predicted probability distribution. 
# It aims to maximize the entropy of the output probabilities, which can encourage the network to output less confident predictions. 
# The entropy loss can be used as a regularization technique to prevent overfitting.

def entropy_loss(p):
    p = F.softmax(p, dim=1)
    epsilon = 1e-5
    return (-1 * torch.sum(p * torch.log(p + epsilon))) / p.shape[0]

In [115]:
# Tune for class exclusion with values of Q
def NL_loss(f,labels, global_negative_pred):
    '''
    def th_delete(tensor, indices):
       mask = torch.ones(tensor.numel(), dtype=torch.bool)
       mask[indices] = False
       print(mask)
       return tensor[:, mask]
   
    f=th_delete(f,global_negative_pred)

    This method of masking softmax does not work
    '''
    
    Q_1 = 1 - F.softmax(f, dim=1) # softmax of f
   
    '''
    for i in global_negative_pred:              # Giving NAN values
        Q_1[0][i]=-1000
    #print(Q_1)
    '''
    Q = F.softmax(Q_1, dim=1) # for calculating weights
    weight = 1 - Q        
    # Set the weights of indices in global_negative_pred to zero
    #weight[:, global_negative_pred] = 0

    out = weight * torch.log(Q) # weight *  Changed here to see difference
    '''
    for i in global_negative_pred:
        out[0][i]=0.001                     # This increases accuracy of pseudo labels
    #print(weight,weight.shape)
    '''
    #print("Output of NL_loss:",out)
    return F.nll_loss(out, labels)  # ignore_index=global_negative_pred  Ignores all classes

In [116]:
# Define arg parser
seed=200
paser = argparse.ArgumentParser() 
args = paser.parse_args("")
np.random.seed(200)
torch.manual_seed(seed)
device=input("Enter cuda or cpu for device type")
device = torch.device(device)
#'cuda' if torch.cuda.is_available() else
device

device(type='cpu')

In [117]:
# Take user inputs 
args.dataset='miniimagenet'
args.data_path='datasets/data/mini-imagenet/'
args.num_classes=5 # Output dimension
args.image_size=84

# FSL definitions
args.num_ways=5 # Number of classes per batch
args.k_shot=5 # number of Images per class
args.query=30 # Query set of the FSL
args.unlabel=50 # Number of unlabel samples per class 
args.steps=5 # Select how many unlabeled data for each class in one iteration.
args.threshold=0.2  # Since we have 5 classes in each support set. So if all the classes are equally probable then mininmum p=0.2

# set in semi-supervised few-shot learning
num_support = args.k_shot * args.num_ways
num_query = args.query * args.num_ways
num_unlabeled = args.unlabel * args.num_ways

# Training or testing definitions 
args.episodes=600

In [118]:
# Number of sets of unlabeled data
num_select = int(args.unlabel / args.steps)

In [119]:
# Import the resnet model and define the model to be used 
model=resnet50(num_classes=1000,pretrained=True)
#model=torch.load('Mymodel.pt')
#model=resnet12(args.num_classes)
model=model.to(device)



In [120]:
# Freeze the CNN layers of Resnet
'''
for param in model.parameters():
    param.requires_grad=False
'''
model.fc=nn.Flatten()  # Flatten the output to obtain the embeddings
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [121]:
# Create the ANN classifier
Classifier=nn.Sequential(nn.Linear(2048,512,bias=True),nn.Linear(512,128,bias=True),nn.Linear(128,5,bias=True))
Classifier=Classifier.to(device)

In [122]:
#  Get the features from the resnet model

def get_features(model,input):
    '''
    The function first checks if the input batch size exceeds a desired batch size. If it does, the input batch is split into smaller batches of size 64, and the 
    ResNet model is called on each smaller batch using the model function with the return_feat=True argument to return the output features in addition to the classification results. 
    The output features are then detached from the computation graph, transferred to the CPU, and appended to a list embed. 
    Once all batches have been processed, the list of output features is concatenated using torch.cat to form a single tensor embed.
    If the input batch size is less than or equal to the desired batch size, the ResNet model is called once with the input batch using the model function with the return_feat=True argument to return the output features.
    Finally, the function checks if the shape of the output features embed matches the shape of the input batch, and returns the output features as a NumPy array using the numpy() method.
    '''
    batch_size = 64  # Use the desired batch size
    input = torch.tensor(input).to(device)
    
    # Check to prevent the input shape from exceeding the desired batch size
    if input.shape[0] > batch_size:
        embed = []
        i = 0
        while i <= input.shape[0]-1:
            embed.append(model(input[i:i+batch_size])) # Changed. Check
            i += batch_size
        embed = torch.cat(embed)
    else:
        embed = model(input) # Changed. Check. Removed return feat
    assert embed.shape[0] == input.shape[0] # Check if input shape = embed shape  as we will be working on input shape.
    return embed.cpu().detach().numpy()

In [123]:
def train_loop(model1, dataset, loss_fn, optimizer, inputs, targets,negative_pred):
    acc =0
    def forward_fn(data, label,neg_pr):
        logits = model1(data)
        #logits=F.softmax(logits) # Changed here       
        loss = loss_fn(logits, label,neg_pr) #+ entropy_loss(logits) # Combination of two loss functions used here. The entropy_loss act as regularizer
        # loss = loss_fn(logits, label)
        print(loss)
        return loss, logits
    
    def train_step(data, label,neg_pre):
        optimizer.zero_grad()
        loss, logits = forward_fn(data, label,neg_pre)
        loss.backward()
        optimizer.step()
        return loss.item(), logits
    
    model1.train()
    final_loss=0
    for i in range(len(inputs)):
        #print(i)
        img = torch.tensor(inputs[i])
        y = torch.tensor(targets[i]).long() # Changed
        img=img.unsqueeze(0).to(device)
        y=y.unsqueeze(0).to(device)
        loss, logits = train_step(img, y,negative_pred[i])
        final_loss+=loss
    final_loss/=len(inputs)
    # Check accuracy
    for i in range(len(inputs)):
        model1.eval()
        #print(i)
        img = torch.tensor(inputs[i])
        y = torch.tensor(targets[i]).long() # Changed
        img=img.unsqueeze(0).to(device)
        y=y.unsqueeze(0).to(device)
        preds = model1(img)
        preds = torch.argmax(preds, 1).reshape(-1)
        y = y.reshape(-1)
        if (preds==y):
          acc +=1
    acc = acc/len(inputs)*100
    return final_loss, logits,acc

In [124]:
def train_loop2(model1, dataset, loss_fn, optimizer, inputs, targets):
    acc =0
    def forward_fn(data, label):
        logits = model1(data)
        #logits=F.softmax(logits) # Changed here       
        loss = loss_fn(logits, label) + entropy_loss(logits) # Combination of two loss functions used here. The entropy_loss act as regularizer
        # loss = loss_fn(logits, label)
        return loss, logits
    
    def train_step(data, label):
        optimizer.zero_grad()
        loss, logits = forward_fn(data, label)
        loss.backward()
        optimizer.step()
        return loss.item(), logits
    
    model1.train()
    final_loss=0
    for i in range(len(inputs)):
        #print(i)
        img = torch.tensor(inputs[i])
        y = torch.tensor(targets[i]).long() # Changed
        img=img.unsqueeze(0).to(device)
        y=y.unsqueeze(0).to(device)
        loss, logits = train_step(img, y)
        final_loss+=loss
    final_loss/=len(inputs)
    # Check accuracy
    for i in range(len(inputs)):
        model1.eval()
        #print(i)
        img = torch.tensor(inputs[i])
        y = torch.tensor(targets[i]).long() # Changed
        img=img.unsqueeze(0).to(device)
        y=y.unsqueeze(0).to(device)
        preds = model1(img)
        preds = torch.argmax(preds, 1).reshape(-1)
        y = y.reshape(-1)
        if (preds==y):
          acc +=1
    acc = acc/len(inputs)*100
    return final_loss, logits,acc

In [125]:
def test_loop(model1,inputs, targets):
    acc =0
    # Check accuracy
    for i in range(len(inputs)):
        model1.eval()
        #print(i)
        img = torch.tensor(inputs[i])
        y = torch.tensor(targets[i]).long() # Changed
        img=img.unsqueeze(0).to(device)
        y=y.unsqueeze(0).to(device)
        preds = model1(img)
        preds = torch.argmax(preds, 1).reshape(-1)
        y = y.reshape(-1)
        if (preds==y):
          acc +=1
    acc = acc/len(inputs)*100
    return acc

In [126]:
def get_preds(out):
    preds = torch.argmin(out, dim=0).item()
    return preds, preds

In [127]:
def get_neg_preds(out,nl_pred):
    def th_delete(tensor, indices):
       mask = torch.ones(tensor.numel(), dtype=torch.bool)
       mask[indices] = False
       #print(mask)
       return tensor[mask]
    #print(out,out.shape)
    new_out=th_delete(out,nl_pred)
    index =torch.argmin(new_out).item()
    val=new_out.view(-1)[index].item()
    original_index=0
    for i in range(out.shape[0]):
        if out[i]==val:
            original_index=i
    print(original_index,val,out)
    return original_index,val


In [128]:
def get_preds_position_(unlabel_out, position, _postion, thres=0.001):
    length = len(position)
    r = []
    un_idx = []
    for idx in range(length):
        pos = position[idx]
        _pos = _postion[idx]
        _out = unlabel_out[idx]  # [pos] removed
        out = F.softmax(_out,dim=0)  # Check if dim=0 or 1 # Correct
        #print(out)
        nl_pred=global_nl_pred[idx] # defined later in the main loop. Taking the list of negative labels already found
        
        '''
        The logic is changed here for proper implementation
        '''
        if len(pos)==1:
            un_idx.append(idx)
            continue
        t,conf=get_neg_preds(out,nl_pred)
        #print(conf)
        
        if conf>thres:
            un_idx.append(idx)
            if len(_pos)==0:
                r.append(torch.argmin(out,dim=1).item().asnumpy())  # check if asnumpy works here or not
            else:
                r.append(_pos[-1])
            continue
        #t, _ = get_preds(out)
        #a = pos[t]
        _postion[idx].append(t)
        position[idx].remove(t)
        #print(position)
        global_nl_pred[idx].append(t)   # Append the nl_value index
        r.append(t)
    return np.asarray(r), un_idx,_postion,position  # Changed here

In [129]:
# Define the dataset and the respective loaders
args.train_episodes=1
train_dataset = DataSet( args.image_size, 'test',args.data_path)
train_sampler = EpisodeSampler(train_dataset.label, args.train_episodes,args.num_ways, args.k_shot, args.query, args.unlabel)
trainloader = DataLoader(train_dataset, batch_sampler=train_sampler,shuffle=False, num_workers=8, pin_memory=True)

In [130]:
# Start the training process 

# in the below code I am not using query set which should be concated with unlabeled data.

for data in tqdm(trainloader):

        # create different sets of data from the train loader
        data = data.cpu()
        targets = torch.arange(args.num_ways).repeat(args.k_shot+args.query+args.unlabel).long()

        #print(data,targets)
    
        support_data = data[:num_support]
        query_data = data[num_support:num_support+num_query]
        unlabel_data = data[num_support+num_query:]

        support_inputs = normalize(get_features(model, support_data))  # get feature embeddings for 
        support_targets = targets[:num_support].cpu().numpy()

        #print(support_inputs.shape,support_targets)

        query_inputs = normalize(get_features(model, query_data))
        query_targets = targets[num_support:num_support+num_query].cpu().numpy()

        #print(query_inputs.shape,query_targets.shape)

        unlabel_inputs = normalize(get_features(model, unlabel_data))
        unlabel_targets = targets[num_support+num_query:].cpu().numpy()

        # The classifier has already been decided as linear classifier with a single dense layer and the output dimension=5

        ori_index = [x for x in range(250)]  # Store the index position of 250 images
        _POSITION = [[] for _ in range(250)] # Create a 2D list to store the list of 5 classes in passed along with the image batch.
        POSITION = [[0, 1, 2, 3, 4] for _ in range(250)] # [0,1,2,3,4] was chosen for encoding the 5 classes
        global_nl_pred=[[] for _ in range(250)] # Store the negative labels of each image after every iteration
        temp_nl_pred=[[] for _ in range(250)]
        
        # Define the loss criterion and the SGD optimizer used here for initial training of model.
        criterion = nn.CrossEntropyLoss(reduction='mean')
      
        optimizer = torch.optim.SGD(Classifier.parameters(), lr = 1e-3, momentum=0.9, weight_decay=5e-4)  # weight decay is for L2 regularization.

        # Begin initial training

        print('\n****************  Initial training the model on Support set')
        for epoch in range(80):
              loss,_,acc=train_loop2(Classifier, None, criterion, optimizer, support_inputs, support_targets)
              print(f"Train_Epoch: {epoch}  Train_Loss: {loss}  Accuracy on Support set:{acc}")
        
        # Start of code using complimentary labels
        '''
        Important : Stop loss propagation in the next step only. Not immediately when label is found
        '''
        i=1
        while(True):
            print('\n********************************************  Training with complimentatry labels')
            select_idx=[]
            nl_pred=[]
            unselect_idx=[]
            unlabel_out = Classifier(torch.tensor(unlabel_inputs).to(device))
            #print("unlabel_out shape", unlabel_out)
            nl_pred, unselect_idx,_POSITION,POSITION = get_preds_position_(unlabel_out, POSITION, _POSITION, args.threshold)  # Changed
            #print(unselect_idx)
            print(len(nl_pred))
            select_idx = [x for x in ori_index if x not in unselect_idx]
            _unlabel_embeddings = unlabel_inputs[select_idx]
            #print(_unlabel_embeddings)
            negative_pred=[global_nl_pred[x] for x in ori_index if x in select_idx]# list containing all the negative labels predicted for that class
            #nl_pred = [nl_pred[x] for x in ori_index if x in select_idx]  # May not be required because nl_pred comes without unselect indexes
            print(f"NL_pred of {i}th iteration",nl_pred)
            if(len(nl_pred)==0  or len(select_idx)==0):   # 
                  break
            optimizer_NL = torch.optim.SGD(Classifier.parameters(), lr = 1e-3, momentum=0.9, weight_decay=5e-4) #, weight_decay=5e-4
            print("Start of Epoch")
            for epoch in range(40):
                loss,_,acc = train_loop(Classifier, None, NL_loss, optimizer_NL, _unlabel_embeddings, nl_pred,negative_pred)
                print(f"Train_Epoch_NL: {epoch}  Train_Loss: {loss}  Accuracy on Support set:{acc}")
                

            # Break condition no negative label found below threshold condition
            i=i+1

        print("Start of training with pseudo labels\n")    
        print("Global NL pred list :",global_nl_pred) # Printing NULL . Check

        print("POSITION : ",POSITION)
        acc=0
        c=0
        print(unlabel_targets,unlabel_targets.shape)
        for i in unlabel_targets:
              if i in POSITION[c]:
                    acc+=1
              c+=1
        print("Accuracy of Pseudo labels :",acc/c)

        # to be corrected

        '''
        class_num = [0 for _ in range(5)]
        pseudo_label = []
        index_pl = []
        for idx in range(len(POSITION)):
            item = POSITION[idx]
            if len(item) == 1:
                lab = item[0]
                pseudo_label.append(item[-1])
                class_num[lab] += 1
                index_pl.append(idx)
        class_num = [item + 8 for item in class_num]
        max_ = max(class_num) * 1.0
        pseudo_label = np.asarray(pseudo_label)
        t1_ = unlabel_inputs[index_pl]
        t2_ = torch.tensor(pseudo_label, dtype=torch.int64)
        print(t2_)
        print("Start of final training")
        for epoch in range(20):
            loss,_,acc=train_loop2(Classifier, None, criterion, optimizer, t1_, t2_)
            print(f"Epoch: {epoch}  Loss: {acc}")   
        '''
        print("Start of testing")
        print("Accuracy of testing on Query Set: ",test_loop(Classifier, query_inputs,query_targets))


  input = torch.tensor(input).to(device)
  input = torch.tensor(input).to(device)
  input = torch.tensor(input).to(device)



****************  Initial training the model on Support set
Train_Epoch: 0  Train_Loss: 3.220634355545044  Accuracy on Support set:20.0
Train_Epoch: 1  Train_Loss: 3.2194923400878905  Accuracy on Support set:20.0
Train_Epoch: 2  Train_Loss: 3.218456325531006  Accuracy on Support set:20.0
Train_Epoch: 3  Train_Loss: 3.217427845001221  Accuracy on Support set:20.0
Train_Epoch: 4  Train_Loss: 3.216398363113403  Accuracy on Support set:20.0
Train_Epoch: 5  Train_Loss: 3.2153657722473143  Accuracy on Support set:20.0
Train_Epoch: 6  Train_Loss: 3.214328441619873  Accuracy on Support set:20.0
Train_Epoch: 7  Train_Loss: 3.213284845352173  Accuracy on Support set:20.0
Train_Epoch: 8  Train_Loss: 3.2122332096099853  Accuracy on Support set:20.0
Train_Epoch: 9  Train_Loss: 3.211172037124634  Accuracy on Support set:20.0
Train_Epoch: 10  Train_Loss: 3.2100995635986327  Accuracy on Support set:20.0
Train_Epoch: 11  Train_Loss: 3.2090141773223877  Accuracy on Support set:20.0
Train_Epoch: 12  Tra

100%|██████████| 1/1 [03:49<00:00, 229.24s/it]
