<a href="https://colab.research.google.com/github/s-a-malik/zero-shot/blob/main/few_shot_loading_data_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch.utils.data as data
import json
import nltk
import os
from PIL import Image
from transformers import BertTokenizer, BertModel
from torchvision.transforms import Compose

class DefaultTransform(Compose):

  def __init__(self, image_size: int):


class Zanim(data.Dataset):

  def __init__(self, root: str, json_path: str, image_transforms: Compose = None):
    """
    :param root: the path to the root directory of the dataset
    :param json_path: the path to the json file containing the annotations
    :param transforms: a composed set of transforms
    """
    if not(root in json_path):
        json_path = os.path.join(root, json_path)
    self.root = root
    self.transforms = image_transforms
    with open(json_path) as annotations:
      annotations = json.load(annotations)

    self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    self.bert_model = BertModel.from_pretrained("bert-base-uncased")

    self.ids = [i['id'] for i in annotations['images']]
    self.image_files = [i['file_name'] for i in annotations['images']]
    self.species_id = [annotations['annotations'][i['id']]['category_id'] for i in annotations['images']]
    self.species_name = [annotations['categories'][i]['name'] for i in self.species_id]
    self.descriptions = [annotations['categories'][index]['description'] for index in self.species_id]

  def __len__(self):
    return len(self.ids)

  def __getitem__(self, index):
    image = Image.open(os.path.join(self.root, self.image_files[index]))
    if self.transforms:
      image = self.transforms(image)

    description = self.descriptions[index] 
    species_id = self.species_id[index]
    tokens = self.tokenizer(description, return_tensors="pt")
    embedding = self.bert_model(**tokens)
    return (image, embedding), species_id, {'description': description, 
                                            'species': self.species_name[index]}


In [5]:
z = Zanim(root="/content/drive/My Drive/NLP project/Dataset", json_path="train.json", image_transforms=None)

In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")
z.descriptions

In [29]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ed/d5/f4157a376b8a79489a76ce6cfe147f4f3be1e029b7144fa7b8432e8acb26/transformers-4.4.2-py3-none-any.whl (2.0MB)
[K     |████████████████████████████████| 2.0MB 5.2MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 17.1MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 35.4MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp37-none-any.whl size=893262 sha256=6ddbc184e8b

In [24]:
print("Species names")
print("\n".join(z.species_name[:10]))
print("\nSpecies ID")
print(z.species_id[:10])
print("\nDescriptions")
print("\n".join(z.descriptions[:10]))
print("\nImage files examples")
print("\n".join(z.image_files[:10]))

Species names
Marmota flaviventris
Dipsosaurus dorsalis
Gopherus agassizii
Grampus griseus
Sterna paradisaea
Fratercula arctica
Cistothorus palustris
Falco mexicanus
Crocodylus acutus
Lanius ludovicianus

Species ID
[524, 607, 665, 388, 100, 84, 242, 138, 545, 205]

Descriptions
Yellow-bellied marmots are rodents similar to squirrels, but bigger. They have grizzled brownish fur and a yellow belly, with a whitish spot between their eyes. Their ears are small and round and they have a short white muzzle with a black nose. Their bodies are heavy-set and they have short legs and a reddish-brown furry tail.
The Desert iguana is one of the most common lizards of the Sonoran and Mojave deserts of the southwestern United States and northwestern Mexico. It is pale gray-tan to cream in color with a light brown reticulated pattern on its back and sides. Down the center of the back is a row of slightly-enlarged, keeled dorsal scales that become slightly larger as you move down the back. The reticu

# Generating SSH key for using drive and github

In [1]:
from google.colab import drive 
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**PUT YOUR EMAIL BELOW**

In [None]:
!ssh-keygen -t ed25519 -C "youmed.tech@gmail.com"

Generating public/private ed25519 key pair.
Enter file in which to save the key (/root/.ssh/id_ed25519): /content/drive/My Drive/ssh/id_ed25519
Enter passphrase (empty for no passphrase): 
Enter same passphrase again: 
Your identification has been saved in /content/drive/My Drive/ssh/id_ed25519.
Your public key has been saved in /content/drive/My Drive/ssh/id_ed25519.pub.
The key fingerprint is:
SHA256:gj/J3Z3Uwyr2H/bwCbhphn+OCtJ3aXon4hoj9MskOUI youmed.tech@gmail.com
The key's randomart image is:
+--[ED25519 256]--+
|                 |
|                 |
|                 |
|     .       o   |
|   E... S   . +  |
|  . .o++ . ooo . |
|   . *=B..==+.+  |
|    . B.*++B++.=.|
|       +o+B=*o..o|
+----[SHA256]-----+


**CREATE A FOLDER SSH IN YOUR GDRIVE**

In [None]:
!rm -rf /etc/ssh/ssh_config
!touch /etc/ssh/ssh_config
!echo "Host *" >> /etc/ssh/ssh_config
!echo " AddKeysToAgent yes" >> /etc/ssh/ssh_config
!echo " IgnoreUnknown UseKeychain" >> /etc/ssh/ssh_config
!echo " IdentityFile \"/content/drive/My Drive/ssh/id_ed25519\"" >> /etc/ssh/ssh_config

In [None]:
!eval "$(ssh-agent -s)" && ssh-add -k /content/drive/My\ Drive/ssh/id_ed25519

Agent pid 686
Identity added: /content/drive/My Drive/ssh/id_ed25519 (youmed.tech@gmail.com)


**COPY THE KEY AND ADD TO YOUR GITHUB**

In [None]:
!ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
!git clone git@github.com:s-a-malik/zero-shot.git

# github.com:22 SSH-2.0-babeld-8514a139
Cloning into 'zero-shot'...
remote: Enumerating objects: 26, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 26 (delta 7), reused 14 (delta 2), pack-reused 0[K
Receiving objects: 100% (26/26), 679.75 KiB | 2.57 MiB/s, done.
Resolving deltas: 100% (7/7), done.
