# Few shot learning NLP

In [1]:
%%capture
!wget https://github.com/martin-fabbri/colab-notebooks/raw/master/nlp/few-shot-learning/datasets/final_fewshot_test.csv
!wget https://github.com/martin-fabbri/colab-notebooks/raw/master/nlp/few-shot-learning/datasets/final_fewshot_train.csv

In [2]:
import keras.backend as K
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import (
    BatchNormalization,
    Dense,
    Dropout,
    Input,
    Lambda,
    Layer,
)
from tensorflow.keras.regularizers import l2

print("tensorflow", tf.__version__)
print("tensorflow_hub", hub.__version__)

tensorflow 2.4.1
tensorflow_hub 0.11.0


In [3]:
test = pd.read_csv("final_fewshot_test.csv")
train = pd.read_csv("final_fewshot_train.csv")

train.shape, test.shape

((100, 3), (3277, 3))

In [4]:
train.head()

Unnamed: 0.1,Unnamed: 0,text,class
0,34,[ALLUXIO-2743] Fix failing unit tests,1
1,2935,#2 Refactored structure of Argument,3
2,84,Remove some features from JwtTokenStore,4
3,42,Remove duplicated 1.613 section from changelog,2
4,2948,* webapp structure refactoring,3


In [5]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
embed = hub.load(module_url)

In [6]:
model = Sequential([
    Input(shape=(512,)),
    Dense(256, activation="relu"),
    Dropout(0.4),
    BatchNormalization(),
    Dense(64, activation="relu", kernel_regularizer=l2(0.001)),
    Dropout(0.4),
    Dense(128, name="dense_layer"),
    Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               131328    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 256)               1024      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                16448     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_layer (Dense)          (None, 128)               8320      
_________________________________________________________________
norm_layer (Lambda)          (None, 128)               0

In [7]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)

    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a - p), axis=-1)
        n_dist = K.sum(K.square(a - n), axis=-1)
        return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)

    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss


In [8]:
in_a = Input(shape=(512,))
in_p = Input(shape=(512,))
in_n = Input(shape=(512,))

emb_a = model(in_a)
emb_p = model(in_p)
emb_n = model(in_n)

triplet_loss_layer = TripletLossLayer(alpha=0.4, name="tripley_loss_layer")([emb_a, emb_p, emb_n])
nn4_small2_train = Model([in_a, in_p, in_n], triplet_loss_layer)

In [9]:
nn4_small2_train.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 512)]        0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 512)]        0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 512)]        0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 128)          157120      input_2[0][0]                    
                                                                 input_3[0][0]                

In [10]:
unique_train_label = np.array(train["class"].unique().tolist())
labels_train = np.array(train["class"].tolist())
map_train_label_indices = {
    label: np.flatnonzero(labels_train == label) for label in unique_train_label
}
map_train_label_indices

{1: array([ 0, 13, 19, 24, 29, 34, 38, 40, 41, 45, 46, 54, 56, 68, 82, 84, 87,
        91, 92, 94]),
 2: array([ 3, 10, 11, 17, 23, 30, 57, 62, 67, 74, 76, 77, 79, 80, 81, 85, 86,
        93, 96, 99]),
 3: array([ 1,  4,  7, 16, 20, 22, 31, 43, 47, 49, 50, 51, 52, 53, 64, 65, 70,
        72, 88, 98]),
 4: array([ 2,  5,  9, 12, 15, 18, 21, 25, 26, 32, 35, 37, 42, 48, 59, 61, 66,
        69, 75, 90]),
 5: array([ 6,  8, 14, 27, 28, 33, 36, 39, 44, 55, 58, 60, 63, 71, 73, 78, 83,
        89, 95, 97])}

In [11]:
def get_triplets(unique_train_label, map_train_label_indices):
    label_l, label_r = np.random.choice(unique_train_label, 2, replace=False)
    a, p = np.random.choice(map_train_label_indices[label_l], 2, replace=False)
    n = np.random.choice(map_train_label_indices[label_r])
    return a, p, n

In [12]:
a, p, n = get_triplets(unique_train_label, map_train_label_indices)
a, p, n

(53, 49, 36)

In [13]:
train.iloc[[a, p, n]]

Unnamed: 0.1,Unnamed: 0,text,class
53,2940,#935 - Refactor cache settings for structured ...,3
49,2934,#143 Package Structure Refactoring,3
36,51,performance improvement based on the input fro...,5


In [14]:
def get_triplets_batch(k, train_set, unique_train_label, map_train_label_indices, embed):
    while True:
        idxs_a, idxs_p, idxs_n = [], [], []
        for _ in range(k):
            a, p, n = get_triplets(unique_train_label, map_train_label_indices)
            idxs_a.append(a)
            idxs_p.append(p)
            idxs_n.append(n)

        a = train_set.iloc[idxs_a].values.tolist()
        p = train_set.iloc[idxs_p].values.tolist()
        n = train_set.iloc[idxs_n].values.tolist()

        a = embed(a)
        p = embed(p)
        n = embed(n)

        yield [a, p, n], []

In [15]:
batch = next(get_triplets_batch(128, train["text"], unique_train_label, map_train_label_indices, embed))
#batch

In [16]:
nn4_small2_train.compile(loss=None, optimizer="adam")

In [17]:
nn4_small2_train.fit(get_triplets_batch(128, train["text"], unique_train_label, map_train_label_indices, embed), epochs=100, steps_per_epoch=10)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f5b760c0e80>

In [18]:
X_train = model.predict(embed(np.array(train['text'].values.tolist())))
X_test = model.predict(embed(np.array(test['text'].values.tolist())))

y_train = np.array(train['class'].values.tolist())
y_test = np.array(test['class'].values.tolist())