<a href="https://colab.research.google.com/github/jprashant21/language-translation/blob/main/transfomer%20few%20shot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install datasets transformers

Collecting datasets
  Downloading datasets-1.14.0-py3-none-any.whl (290 kB)
[K     |████████████████████████████████| 290 kB 5.0 MB/s 
[?25hCollecting transformers
  Downloading transformers-4.12.2-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 35.5 MB/s 
Collecting huggingface-hub<0.1.0,>=0.0.19
  Downloading huggingface_hub-0.0.19-py3-none-any.whl (56 kB)
[K     |████████████████████████████████| 56 kB 3.8 MB/s 
Collecting fsspec[http]>=2021.05.0
  Downloading fsspec-2021.10.1-py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 54.9 MB/s 
[?25hCollecting xxhash
  Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)
[K     |████████████████████████████████| 243 kB 63.0 MB/s 
Collecting aiohttp
  Downloading aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 40.0 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)


In [6]:
import transformers
import torch.nn as nn
print(transformers.__version__)

4.12.2


In [3]:
## self attention dot product
# dimension query = [1,5,768]

def self_attn_dot_product(query,key,value):

  emb_dim = query.size(-1)
  score = torch.bmm(query,key.transpose(1,2)) / sqrt(emb_dim)
  weights = F.softmax(score, dim=-1)
  return torch.bmm(weights,value)


In [8]:
## attention head

class AttentionHead(nn.Module):

  def __init__(self,emd_dim,head_dim):
    super().__init__()
    self.q = nn.Linear(emb_dim,head_dim)
    self.k = nn.Linear(emb_dim,head_dim)
    self.v = nn.Linear(emb_dim,head_dim)

  def forward(self,hidden_state):
    return self_attn_dot_product(self.q(hidden_state), 
                                 self.k(hidden_state),
                                 self.v(hidden_state))


In [9]:
## multi headed attention

class MultiHeadedAttention(nn.Module):

  def __init__(self,config):
    super().__init__()
    self.emb_dim = config.emb_dim
    self.num_heads = config.num_heads
    self.head_dim = self.emb_dim // self.num_heads

    self.heads = nn.ModuleList(
        [AttentionHead(self.emb_dim,self.head_dim) for _ in self.num_heads]
    )
    self.output_linear = nn.Linear(self.emb_dim,self.emb_dim)

  def forward(hidden_state):
    x = torch.cat( [h(hidden_state) for h in self.heads], dim=-1)
    x = self.output_linear(x)
    return x

In [10]:
## feed forward

class FeedForward(nn.Module):

  def __init__(self,config):
    super().__init__()
    self.hidden_size = config.hidden_size
    self.intermediate_size = config.intermediate_size

    self.l1 = nn.Linear(self.hidden_size,config.intermediate_size)
    self.l2 = nn.Linear(config.intermediate_size,self.hidden_size)
    self.gleu = nn.GLEU()
    self.dropout = nn.Dropout(config.hidden_dropout_prob)

  def forward(x):
    x = self.l1(x)
    x = self.gleu(x)
    x = self.l2(x)
    x = self.dropout(x)
    return x



In [11]:
## layer norm and skip connection

class TransformerEncoderLayer(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.layer_norm_1 = nn.LayerNorm(config.hidden_size)
        self.layer_norm_2 = nn.LayerNorm(config.hidden_size)
        self.attention = MultiHeadAttention(config)
        self.feed_forward = FeedForward(config)

    def forward(x):
      x = self.layer_norm_1(x)
      x = x + self.attention(x)
      x = self.layer_norm_2(x)
      x = x + self.feed_forward(x)
      return x


In [12]:
## positional embedding

class PostionalEmbeddings(nn.Module):

  def __init__(self,config):
    super().__init__()
    self.token_embedding = nn.Embedding(config.vocab_size, config.hidden_size)
    self.position_embedding = nn.Embedding(config.max_position_size, config.hidden_size)
    self.layer_norm = nn.LayerNorm(config.hidden_size)
    self.dropout = nn.DropOut(config.hidden_dropout_prob)

  def forward(self,input_ids):

    seq_len = input_ids.size(1)
    position_ids = torch.arange(seq_len, dtype=torch.long).unsqueeze(0)
    token_embeddings = self.token_embedding(input_ids)
    position_embeddings = self.position_embeddings(position_ids)

    embeddings = token_embeddings + position_embeddings
    x = self.layer_norm(embeddings)
    x = self.dropout(x)
    return x



In [13]:
class TransformerEncoder(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.embeddings = PostionalEmbeddings(config)
        self.encoderstack = nn.ModuleList([TransformerEncoderLayer(config)
                                     for _ in range(config.num_hidden_layers)])

    def forward(self, x):
        x = self.embeddings(x)
        for layer in self.encoderstack:
            x = layer(x)
        return x

encoder = TransformerEncoder(config)
encoder(inputs.input_ids).size()


NameError: ignored

In [14]:
from datasets import load_dataset, load_metric

In [15]:
datasets = load_dataset("swag", "regular")

Downloading:   0%|          | 0.00/2.35k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

Downloading and preparing dataset swag/regular (download: 41.92 MiB, generated: 44.96 MiB, post-processed: Unknown size, total: 86.88 MiB) to /root/.cache/huggingface/datasets/swag/regular/0.0.0/9640de08cdba6a1469ed3834fcab4b8ad8e38caf5d1ba5e7436d8b1fd067ad4c...


  0%|          | 0/3 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/6.71M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.24M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.21M [00:00<?, ?B/s]

  0%|          | 0/3 [00:00<?, ?it/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset swag downloaded and prepared to /root/.cache/huggingface/datasets/swag/regular/0.0.0/9640de08cdba6a1469ed3834fcab4b8ad8e38caf5d1ba5e7436d8b1fd067ad4c. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [16]:
datasets

DatasetDict({
    train: Dataset({
        features: ['video-id', 'fold-ind', 'startphrase', 'sent1', 'sent2', 'gold-source', 'ending0', 'ending1', 'ending2', 'ending3', 'label'],
        num_rows: 73546
    })
    validation: Dataset({
        features: ['video-id', 'fold-ind', 'startphrase', 'sent1', 'sent2', 'gold-source', 'ending0', 'ending1', 'ending2', 'ending3', 'label'],
        num_rows: 20006
    })
    test: Dataset({
        features: ['video-id', 'fold-ind', 'startphrase', 'sent1', 'sent2', 'gold-source', 'ending0', 'ending1', 'ending2', 'ending3', 'label'],
        num_rows: 20005
    })
})

In [17]:
datasets["train"][0]

{'ending0': 'passes by walking down the street playing their instruments.',
 'ending1': 'has heard approaching them.',
 'ending2': "arrives and they're outside dancing and asleep.",
 'ending3': 'turns the lead singer watches the performance.',
 'fold-ind': '3416',
 'gold-source': 'gold',
 'label': 0,
 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
 'sent2': 'A drum line',
 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
 'video-id': 'anetv_jkn6uvmqwh4'}

In [21]:

from datasets import ClassLabel
import random
import pandas as pd
from IPython.display import display, HTML

def show_random_elements(dataset, num_examples=10):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
    
    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
    display(HTML(df.to_html()))

show_random_elements(datasets["train"])

Unnamed: 0,video-id,fold-ind,startphrase,sent1,sent2,gold-source,ending0,ending1,ending2,ending3,label
0,lsmdc0049_Hannah_and_her_sisters-69040,4945,"While someone answers him offscreen, someone turns and walks toward him. The camera","While someone answers him offscreen, someone turns and walks toward him.",The camera,gold,turns to reveal lying in bed.,scans over the washing.,moves past a table and down a hallway where the cozy - like - living room and switches her attention to someone.,"follows the doctor's movements from the other side of the wall, obscuring him briefly.",3
1,lsmdc3034_IDES_OF_MARCH-3102,7691,"Indoors, we peer down through a narrow gap at a stairwell banister and glimpsed someone climbing up. Arriving in a hallway, he","Indoors, we peer down through a narrow gap at a stairwell banister and glimpsed someone climbing up.","Arriving in a hallway, he",gold,gingerly walks a barefoot man through a door.,flips the axe shut and leans back against the wall.,spots a man using a walkie - talkie as he hurries from a room.,"picks up a remote face, which is on a lantern.",2
2,lsmdc1038_The_Great_Gatsby-87195,2880,A guy plays trumpet on a fire escape. Someone,A guy plays trumpet on a fire escape.,Someone,gold,returns the restaurant to an empty field and talks about how to play the game.,knocks above the ground.,reacts for the phone.,picks someone up and twirls him around.,3
3,anetv_snG89ed-64M,11400,A small group of people are seen playing lacrosse on a field running up and down the field and pushing one another. Several shots of players hitting one another,A small group of people are seen playing lacrosse on a field running up and down the field and pushing one another.,Several shots of players hitting one another,gold,are shown shooting back as well as others showing off and throwing frisbees and others walking around.,is shown as well as players pushing other people.,throughout as well as people run near and after a ball.,with the other athletes and others running on the field.,1
4,anetv_WYjtYDyHUxY,3277,Several shots are shown of paintball are shown followed by many pictures of guns. More pictures of guns,Several shots are shown of paintball are shown followed by many pictures of guns.,More pictures of guns,gold,being shown as well as shots of people riding the horses.,are shown and ends with text across the screen.,are shown as well as working on bikes and talking and people doing weird things.,are shown as well as the pictures showing more more people while others walk opposite the arena.,1
5,anetv_GCtrfXIBbwA,9336,Young man introduces his video lesson and lays out the lesson plan. Musical notes,Young man introduces his video lesson and lays out the lesson plan.,Musical notes,gold,are displayed onscreen while harmonica plays.,are shown on the screen floor showing regular music including the iron pair.,are in progress in the game.,appear standing in a pool.,0
6,anetv_AIxxMCcmIgM,8838,We see the water spraying on the left. The man,We see the water spraying on the left.,The man,gold,returns and spots the man again.,jumps over to the right.,throws the fish around the ceiling.,drops some eggs from his bowl.,1
7,anetv_ANuV_fDgI54,2663,A mans's face on the left of the screen moves back and out of the shot. The camera angle changes and we,A mans's face on the left of the screen moves back and out of the shot.,The camera angle changes and we,gen,see this's half of fun that we've caught.,see his face in his hands.,see the yellow figurine on the nightstand.,see a person black next to a camera helmeted.,1
8,anetv_xUHXFXela-0,5724,Man is climbong a rock wall atached to harness. a group of friends are standing on the field and above them the man,Man is climbong a rock wall atached to harness.,a group of friends are standing on the field and above them the man,gold,watches using some athletic pole.,is climbing the rock wall.,is standing next to them.,seems to be being hit.,1
9,lsmdc0032_The_Princess_Bride-66266,6553,"Someone drives a fist into the beast's face, rolling it off. He","Someone drives a fist into the beast's face, rolling it off.",He,gen,raises his head and looks at the rat as he is led out of the jungle.,"slides quickly to his right, pressing his lips out of a hard knit, which tightens against his.",waves at the ammunition spilling his coffee.,takes a deep breath through his nose.,3


In [23]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-uncased"
batch_size = 16

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

In [24]:
tokenizer("Hello, this one sentence!", "And this sentence goes with it.")

{'input_ids': [101, 7592, 1010, 2023, 2028, 6251, 999, 102, 1998, 2023, 6251, 3632, 2007, 2009, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [25]:
ending_names = ["ending0", "ending1", "ending2", "ending3"]

def preprocess_function(examples):
    # Repeat each first sentence four times to go with the four possibilities of second sentences.
    first_sentences = [[context] * 4 for context in examples["sent1"]]
    # Grab all second sentences possible for each context.
    question_headers = examples["sent2"]
    second_sentences = [[f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)]
    
    # Flatten everything
    first_sentences = sum(first_sentences, [])
    second_sentences = sum(second_sentences, [])
    
    # Tokenize
    tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
    # Un-flatten
    return {k: [v[i:i+4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}

In [26]:
encoded_datasets = datasets.map(preprocess_function, batched=True)

  0%|          | 0/74 [00:00<?, ?ba/s]

  0%|          | 0/21 [00:00<?, ?ba/s]

  0%|          | 0/21 [00:00<?, ?ba/s]

In [28]:
from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer

model = AutoModelForMultipleChoice.from_pretrained(model_checkpoint)

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMultipleChoice: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMultipleChoice from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMultipleChoice from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMultipleChoice were not initialized from the model checkpoint at bert-base-uncased and are newly

In [29]:
model_name = model_checkpoint.split("/")[-1]
args = TrainingArguments(
    f"{model_name}-finetuned-swag",
    evaluation_strategy = "epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
)

In [30]:
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import torch

@dataclass
class DataCollatorForMultipleChoice:
    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [[{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features]
        flattened_features = sum(flattened_features, [])
        
        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )
        
        # Un-flatten
        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        # Add back labels
        batch["labels"] = torch.tensor(labels, dtype=torch.int64)
        return batch