<a href="https://colab.research.google.com/github/sahug/ds-bert/blob/main/BERT%20NLP%20-%20Session%206%20-%20Intent%20Recognition%20Using%20BERT%20and%20Tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**BERT NLP - Session 6 - Intent Recognition Using BERT and Tensorflow.ipynb**

In [47]:
%pip install -qq transformers

**Load Dataset**

In [48]:
!gdown --id 1OlcvGWReJMuyYQuOZm149vHWwPtlboR6 --output train.csv
!gdown --id 1Oi5cRlTybuIF2Fl5Bfsr-KkqrXrdt77w --output valid.csv
!gdown --id 1ep9H6-HvhB4utJRLVcLzieWNUSG3P_uF --output test.csv

Downloading...
From: https://drive.google.com/uc?id=1OlcvGWReJMuyYQuOZm149vHWwPtlboR6
To: /content/train.csv
100% 799k/799k [00:00<00:00, 115MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Oi5cRlTybuIF2Fl5Bfsr-KkqrXrdt77w
To: /content/valid.csv
100% 43.3k/43.3k [00:00<00:00, 32.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=1ep9H6-HvhB4utJRLVcLzieWNUSG3P_uF
To: /content/test.csv
100% 43.1k/43.1k [00:00<00:00, 33.4MB/s]


**Import Libraries**

In [49]:
import pandas as pd

**Import Dataset**

In [50]:
train = pd.read_csv("train.csv")
valid = pd.read_csv("valid.csv")
test = pd.read_csv("test.csv")

In [51]:
train = train.append(valid).reset_index(drop=True)

In [52]:
train.shape, test.shape

((13784, 2), (700, 2))

In [53]:
train.head()

Unnamed: 0,text,intent
0,listen to westbam alumb allergic on google music,PlayMusic
1,add step to me to the 50 clásicos playlist,AddToPlaylist
2,i give this current textbook a rating value of...,RateBook
3,play the song little robin redbreast,PlayMusic
4,please add iris dement to my playlist this is ...,AddToPlaylist


In [54]:
test.head()

Unnamed: 0,text,intent
0,add sabrina salerno to the grime instrumentals...,AddToPlaylist
1,i want to bring four people to a place that s ...,BookRestaurant
2,put lindsey cardinale into my hillary clinton ...,AddToPlaylist
3,will it snow in mt on june 13 2038,GetWeather
4,play signe anderson chant music that is newest,PlayMusic


In [55]:
train["intent"].unique(), len(train["intent"].unique())

(array(['PlayMusic', 'AddToPlaylist', 'RateBook', 'SearchScreeningEvent',
        'BookRestaurant', 'GetWeather', 'SearchCreativeWork'], dtype=object),
 7)

In [56]:
test["intent"].unique(), len(test["intent"].unique())

(array(['AddToPlaylist', 'BookRestaurant', 'GetWeather', 'PlayMusic',
        'SearchScreeningEvent', 'SearchCreativeWork', 'RateBook'],
       dtype=object), 7)

**Label Encoding**

In [57]:
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder()
train["intent"] = label_encoder.fit_transform(train["intent"])
test["intent"] = label_encoder.fit_transform(test["intent"])

In [58]:
train["intent"], test["intent"]

(0        3
 1        0
 2        4
 3        3
 4        0
         ..
 13779    6
 13780    6
 13781    6
 13782    6
 13783    6
 Name: intent, Length: 13784, dtype: int64, 0      0
 1      1
 2      0
 3      2
 4      3
       ..
 695    5
 696    5
 697    4
 698    0
 699    4
 Name: intent, Length: 700, dtype: int64)

**Split Data - Feature and Labels**

In [59]:
y_train = train["intent"]
x_train = train.drop("intent", axis=1)
y_test = test["intent"]
x_test = test.drop("intent", axis=1)

In [60]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((13784, 1), (13784,), (700, 1), (700,))

In [61]:
x_train.head()

Unnamed: 0,text
0,listen to westbam alumb allergic on google music
1,add step to me to the 50 clásicos playlist
2,i give this current textbook a rating value of...
3,play the song little robin redbreast
4,please add iris dement to my playlist this is ...


In [62]:
y_train.head()

0    3
1    0
2    4
3    3
4    0
Name: intent, dtype: int64

**OHE**

In [63]:
import tensorflow as tf
y_train = tf.one_hot(y_train, depth=len(train["intent"].unique()))
y_test = tf.one_hot(y_test, depth=len(train["intent"].unique()))

In [64]:
y_train, y_test

(<tf.Tensor: shape=(13784, 7), dtype=float32, numpy=
 array([[0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 1., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)>,
 <tf.Tensor: shape=(700, 7), dtype=float32, numpy=
 array([[1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 1., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 1., 0., 0.]], dtype=float32)>)

**Tensorflow BERT Preprocessing**

In [65]:
%pip install -qq tensorflow_hub 
%pip install -U -qq tensorflow_text

In [66]:
from tensorflow import keras
import tensorflow_text as text
import tensorflow_hub as hub

In [67]:
preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")

In [68]:
def pre_processing(example):
  return preprocess(example)

**Tensorflow BERT Encoder**

In [69]:
encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [70]:
def encode_inputs(preprocess_text):
  return encoder(preprocess_text)

**Build Model**

In [71]:
input = keras.layers.Input(shape=(), dtype=tf.string, name="input")
preprocess = pre_processing(input)
encode = encode_inputs(preprocess)

# NNL
nnl = tf.keras.layers.Dropout(0.1, name="dropout")(encode["pooled_output"])
nnl = tf.keras.layers.Dense(7, activation="softmax", name="output")(nnl)

# Construct Final Model
model = tf.keras.Model(inputs=[input], outputs=[nnl])

In [72]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None,)]            0           []                               
                                                                                                  
 keras_layer_2 (KerasLayer)     {'input_word_ids':   0           ['input[0][0]']                  
                                (None, 128),                                                      
                                 'input_type_ids':                                                
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128)}                                                    

In [73]:
#Compile Model
METRICS = [
           tf.keras.metrics.BinaryAccuracy(name="accuracy"),
           tf.keras.metrics.Precision(name="precision"),
           tf.keras.metrics.Recall(name="recall"),
]

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=METRICS)

In [None]:
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=2)

Epoch 1/2
 17/431 [>.............................] - ETA: 1:29:22 - loss: 0.4575 - accuracy: 0.8569 - precision: 0.0000e+00 - recall: 0.0000e+00