In [15]:
import torch
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM
import numpy as np
from utils import loss

In [None]:
# Few-Shot Intent Detection via Contrastive Pre-Training and Fine-Tuning
# https://github.com/jianguoz/Few-Shot-Intent-Detection

## Load dataset 

In [None]:
!ls ../../dataset/Few-Shot-Intent-Detection/Datasets/

ATIS  BANKING77  BANKING77-OOS	CLINC150  CLINC-Single-Domain-OOS  HWU64  SNIPS


## Parameters
- N - number of sentences in the batch 
- t - temperature paramter -> control penalty for neg samples
- u - sentecnce or utterances 
- hi - representation of ui
- hi - BERT(ui) -> we use bert-base-uncased
- i-th -> order of sentence
- hi_bar -> the representation of ui_bar 
- ui_bar -> the same sentence as ui but (10%) tokens are random masked  (Devlin et al.,2019)
- M - the number of maksed tokens in each batch 

## Preprocessesing data
  Todo 
  1. dont forget to remove utterances that less than five tokens
  2. we will all exclude  CLINC-Single-Domain-OOS

## Trainning process
 1. during batch trainning dont forget to randomly maksed, a sentence has 
    different maksed postion across different trannig epochs
 2. (ui ,ui_bar) -> single encoder during batch tranning  (Gao et al., 2021)
 3. add mask language modelling loss  (Devlinet al., 2019; Wu et al., 2020a)
 4. P(xm) -> predicted probabilty of maksed token xm over total vocabulary

## Experiment Setting
- contrasive Pretraining 
 1. Pre-train the combined intent datasets -> combine every dataset (guess)
 2. 15 epochs
 3. batch size = 64
 4. t = 0.1 , lamda = 1.0

- Fine tunning 
 1. 5-shot -> five trainning examples per intents
 2. 10-shot -> tens trainning examples per intents
 3. batch size = 16
 4. t =  {0.1, 0.3, 0.5}
 5. λ ∈ {0.01, 0.03, 0.05}
 6. 30 epochs
 7. apply label smoothing to the intent classifcation loss (Zhang et al. (2020a)

## Load Pretrain model from vocabuary

In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [6]:
# Tokenize input 
text = "who was Elon musk ? Elon musk was an entrepreneur and business magnate"
tokenized_text = tokenizer.tokenize(text)

In [7]:
# Mask a token that we will try to predict with MLM
masked_index = 7
tokenized_text[masked_index] = '[MASK]'
#assert tokenized_text == [who was Elon musk ? Elon musk was an entrepreneur 'and','business','magnate']

In [8]:
print(len(tokenized_text))
tokenized_text

17


['who',
 'was',
 'el',
 '##on',
 'mu',
 '##sk',
 '?',
 '[MASK]',
 '##on',
 'mu',
 '##sk',
 'was',
 'an',
 'entrepreneur',
 'and',
 'business',
 'magnate']

In [9]:
# Convert token to vocabulary indices
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
# Define sentence A and B indices associated to 1st and 2nd 
segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
# Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])

## Load weight Pretrain model

In [10]:
model = BertModel.from_pretrained('bert-base-uncased')
model.eval()
print("eval done")

eval done


In [11]:
# Predict hidden states features for each layer
encoded_layers, _ = model(tokens_tensor, segments_tensors)

In [12]:
# We have a hidden states for each of the 12 layers in model bert-base-uncased
assert len(encoded_layers) == 12

In [13]:
# Load pre-trained model (weights)
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()

# Predict all tokens
predictions = model(tokens_tensor, segments_tensors)

# confirm we were able to predict 'henson'
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])
#assert predicted_token == 'musk'

In [14]:
predicted_token

['el']

## Config

In [44]:
N = 10 #None
temperature = 0.5  #None

In [74]:
a = np.random.rand(3,2)
b = np.random.rand(3,2)

In [75]:
a.shape , b.shape

((3, 2), (3, 2))

In [76]:
self_supervised_cl(a,b)

[[1.92643996 1.70838819]
 [3.35620428 2.74840212]]
9.7394345458278


-0.1

In [None]:
predicted_token