In [None]:
# Installations and imports
!pip install datasets
import pandas as pd
import numpy as np
from datasets import load_dataset
from scipy.spatial import distance



In [None]:
# Connect Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Load CIFAR training data
dataset = load_dataset('cifar100', split='train')

Downloading:   0%|          | 0.00/2.21k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading and preparing dataset cifar100/cifar100 (download: 161.17 MiB, generated: 418.62 MiB, post-processed: Unknown size, total: 579.80 MiB) to /root/.cache/huggingface/datasets/cifar100/cifar100/1.0.0/0f9be8dd0480d385177a5c250878f4480651bbf0fc86d714b33d56c9aaad5160...


Downloading:   0%|          | 0.00/169M [00:00<?, ?B/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset cifar100 downloaded and prepared to /root/.cache/huggingface/datasets/cifar100/cifar100/1.0.0/0f9be8dd0480d385177a5c250878f4480651bbf0fc86d714b33d56c9aaad5160. Subsequent calls will reuse this data.


In [None]:
# Process training data
cifar100train = pd.DataFrame(dataset)
cifar100train.drop(['coarse_label'], axis = 1, inplace=True)
cifar100train.to_csv('CIFAR100TRAIN.csv',index=False)

feat_list = []
for img in cifar100train['img']:
  feat_list.append(np.array(img).flatten())

data = pd.DataFrame(feat_list)
data['class'] = cifar100train['fine_label'].tolist()
data.to_pickle('zeroshot_data.pkl') # Reupload it to correct location / drive

In [None]:
import random
cutoff_label = 10 # 0 - 9 -> unseen , 10-99 ->seen
total_class = 100

zsl_labels = sorted(random.sample(range(0, total_class), cutoff_label))
train_labels = [i for i in range(total_class) if i not in zsl_labels]

In [None]:
zsl_labels

[14, 23, 27, 31, 46, 52, 59, 68, 70, 72]

In [None]:
# Create text files having the classes
!touch '/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/ZSL/train_classes.txt'
!touch '/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/ZSL/zsl_classes.txt'

In [None]:
# Reupload it to correct location / drive
with open('/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/ZSL/train_classes.txt', 'w') as f:
    for label in train_labels:
        f.write(str(label))
        f.write('\n')

# Reupload it to correct location / drive
with open('/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/ZSL/zsl_classes.txt', 'w') as f:
    for label in zsl_labels:
        f.write(str(label))
        f.write('\n')

In [None]:
TRAINCLASSESPATH  = "/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/ZSL/train_classes.txt"
ZSLCLASSESPATH  = "/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/ZSL/zsl_classes.txt"
global train_classes
with open(TRAINCLASSESPATH, 'r') as infile:
    train_classes = [str.strip(line) for line in infile]

global zsl_classes
with open(ZSLCLASSESPATH, 'r') as infile:
    zsl_classes = [str.strip(line) for line in infile]

In [None]:
# Retrieve the merged word vector - vgg csv
merged_df = pd.read_csv('/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/Dataset/Merged_WordVec_VGGFeat.csv')

In [None]:
cls = [i for i in range(total_class)]
merged_df['class'] = cls

In [None]:
unseen_class =[int(i) for i in zsl_classes]
seen_class =[int(i) for i in train_classes]
total_cls = total_class

unseen_wordvec = {}

def minkowskiDists(unseen,seen):
  # Returns minkowski distance between two word2 vecs (between unseen and seen class)
  v1 = list(merged_df.iloc[unseen][:300])
  v2 = list(merged_df.iloc[seen][:300])
  return distance.minkowski(v1, v2)


# For every class in unseen class, find minkowski dist of wordvectors of every seen class with each unseen class
for uc in unseen_class:
  temp=[]
  for sc in seen_class:
    temp.append(minkowskiDists(uc,sc))
  unseen_wordvec[uc] = np.array(temp)


unseen_vgg={}

# VGG features * Word Vector
def vgg_wordvec(vgg_elem,wv):
  return vgg_elem * wv

# For every VGG feature of unseen class, multiply with the new word2vec
vgg_feat_list_seen = np.array(merged_df.iloc[seen_class])
vgg_feat_list_seen = vgg_feat_list_seen[:,300:-1]
def vgg(unseen):
  # # Get the existing VGG features of the unseen class
  # vgg_feat_list = list(merged_df[merged_df.columns[300:-1]].iloc[[unseen]].reset_index(drop=True).T[0])
  # temp = []

  # # For each VGG feature
  # for vgg_feat in vgg_feat_list:
  #   sum =0
  #   # Add the new word vectors 
  #   for elem in unseen_wordvec[unseen]:
  #     sum+=vgg_wordvec(vgg_feat,elem)
  #   temp.append(sum)
  # # Save the new VGG features to another dictionary
  # unseen_vgg[unseen] = np.array([i/total_cls for i in temp])
  weight = unseen_wordvec[unseen]
  weight = weight.reshape(1,-1)
  unseen_vgg_feat = np.dot(weight,vgg_feat_list_seen)/weight.shape[1]
  unseen_vgg[unseen] = unseen_vgg_feat.squeeze()

# For each unseen classes  
for uc in unseen_class:
  vgg(uc)

In [None]:
dummy_df = merged_df.copy() # Copy the original dataframe, for safety.

In [None]:
# For every unseen class
for uc in unseen_class:
  # Add the original wordvectors of the unseen class with the newly formed VGG features
  wv = np.array(list(merged_df[merged_df.columns[:300]].iloc[[uc]].reset_index(drop=True).T[0]))
  vg = unseen_vgg[uc]
  new_feat = np.append(wv,vg)
  # Reset the row with new values
  dummy_df.loc[uc,:400] = new_feat

dummy_df.drop(['class'],axis=1,inplace=True) # Removing the "class" column

  


In [None]:
emptyarray = []
emptyarray_vgg = []
for cls in range(total_class):
    empty_arr_item = [cls] + list(dummy_df.iloc[cls,:300])
    emptyarray.append( empty_arr_item )
    empty_arr_vgg_item = [cls] + list(dummy_df.iloc[cls,:400])
    emptyarray_vgg.append( empty_arr_vgg_item )

# Reupload it to correct location / drive
with open('class_vec.npy', 'wb') as f:
    np.save(f, np.array(emptyarray))

# Reupload it to correct location / drive
with open('class_vec_new.npy', 'wb') as f:
    np.save(f, np.array(emptyarray_vgg))

In [None]:
emptyarray_vgg_100 = []
for cls in range(total_class):
  
  emptyarray_vgg_100.append( [cls,np.array(dummy_df.loc[cls][300:])] )


In [None]:
# Reupload it to correct location / drive
with open('class_vec_new_100.npy', 'wb') as f:
    np.save(f, np.array(emptyarray_vgg_100))

  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
emptyarray = []
emptyarray_vgg = []
for cls in range(total_class):
  emptyarray.append( [cls,np.array(dummy_df.loc[cls][:300])] )
  emptyarray_vgg.append( [cls,np.array(dummy_df.loc[cls])] )

# Reupload it to correct location / drive
with open('/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/ZSL/class_vec.npy', 'wb') as f:
    np.save(f, np.array(emptyarray))

# Reupload it to correct location / drive
with open('/content/drive/MyDrive/Machine Learning/Zero Shot Learning/NTECH/CIFAR/ZSL/class_vec_new.npy', 'wb') as f:
    np.save(f, np.array(emptyarray_vgg))

  if __name__ == '__main__':
  del sys.path[0]


In [None]:
!nvidia-smi

Thu Dec 23 06:44:27 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces