# Joint Intent Classification and Slot filling with BERT
This notebook is based on the paper __BERT for Joint Intent Classification and Slot Filling__ by Chen et al. (2019), https://arxiv.org/abs/1902.10909 but on a different dataset made for a class project.

Ideas were also taken from https://github.com/monologg/JointBERT, which is a PyTorch implementation of the paper with the original dataset.


## Install transformers

In [None]:
!pip install transformers
#rohit was here

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 7.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 45.0 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 11.7 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 13.1 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Un

## Download data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

df = pd.read_json('/content/drive/MyDrive/op.json')

intent_map = dict(enumerate(df.intent.factorize()[1]))
df['intents'] = df.intent.factorize()[0]
#print(intent_name)

print(df.shape)
df.head()

(19570, 4)


Unnamed: 0,code_mixed_slots,code_mixed_sentence,intent,intents
0,"B-fromloc.city_name,B-fromloc.state_code,O,B-t...",washington dc se boston ke lie udaanon kee soo...,flight#ground_service,0
1,"B-airline_name,I-airline_name,O,B-meal_descrip...",tower air par snacks diye jaate hain kya also ...,meal#airfare#flight,1
2,"B-fromloc.city_name,O,B-toloc.city_name,I-tolo...",baltimore se san francisco tak udaanen,flight,2
3,"B-fromloc.city_name,I-fromloc.city_name,O,B-to...",kansas city se chicago ke lie kaun see udaanen...,flight#abbreviation,3
4,"B-depart_time.start_time,O,B-depart_time.end_t...",8 se 9 pm ke beech denver aane ya jaane vaalee...,flight,2


In [None]:
intent_names = list(intent_map.values())
intent_names

['flight#ground_service',
 'meal#airfare#flight',
 'flight',
 'flight#abbreviation',
 'airfare#flight',
 'airline',
 'flight#airport',
 'ground_service',
 'abbreviation',
 'airline#flight',
 'flight#airfare',
 'abbreviation#flight',
 'flight#quantity#airfare',
 'flight#aircraft',
 'airline#flight_no#flight',
 'airfare',
 'airfare#abbreviation',
 'airfare#flight#ground_service',
 'flight#airline#ground_service',
 'ground_service#flight',
 'airfare#ground_service',
 'aircraft#ground_service#flight',
 'flight#airfare#flight',
 'airline#airport',
 'aircraft',
 'quantity#flight',
 'flight#airline',
 'flight#meal',
 'airfare#restriction#flight',
 'flight#capacity',
 'flight#quantity',
 'flight#aircraft#flight#flight_no',
 'flight_time',
 'flight_time#flight',
 'flight#flight_time',
 'distance#quantity',
 'aircraft#flight#airfare#flight',
 'airfare#flight#flight',
 'aircraft#flight',
 'airline#flight#airfare',
 'flight#airfare#abbreviation#flight',
 'airline#ground_service',
 'ground_service#

In [None]:
intents = []
for i in intent_names:
  lis= i.split("#")
  for i in lis:
    if not i in intents:
      intents.append(i)
intents

['flight',
 'ground_service',
 'meal',
 'airfare',
 'abbreviation',
 'airline',
 'airport',
 'quantity',
 'aircraft',
 'flight_no',
 'restriction',
 'capacity',
 'flight_time',
 'distance',
 'ground_fare',
 'city',
 'day_name',
 'cheapest']

In [None]:
len(intents)

18

## Read data from json files

Data is of the following format
````json5
{
  "text": "",
  "positions": [{}],
  "slots": [{}],
  "intent": ""
}
````

We will be using `text` as the input and `slots` and `intent` as lables

In [None]:
df_train = df.iloc[:int(df.shape[0]*0.8)]
df_test = df.iloc[int(df.shape[0]*0.8):]

print(df_test.shape)
print(df_train.shape)

(3914, 4)
(15656, 4)


In [None]:
### UTILITY FUNCTIONS FOR TOKENIZATIONS, MASKS AND SEGMENTS CREATION ###
### from: https://www.kaggle.com/akensert/bert-base-tf2-0-now-huggingface-transformer

def set_seed(seed):
    
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    

def convert_to_transformer_inputs(str1, tokenizer, max_sequence_length, double=True):
    
    def return_id(str1, truncation_strategy, length):

        inputs = tokenizer.encode_plus(str1,
            add_special_tokens=True,
            max_length=length,
            truncation_strategy=truncation_strategy)
        
        input_ids =  inputs["input_ids"]
        input_masks = [1] * len(input_ids)
        input_segments = inputs["token_type_ids"]
        
        padding_length = length - len(input_ids)
        padding_id = tokenizer.pad_token_id
        
        input_ids = input_ids + ([padding_id] * padding_length)
        input_masks = input_masks + ([0] * padding_length)
        input_segments = input_segments + ([0] * padding_length)
        
        return [input_ids, input_masks, input_segments]
    
    if double:
    
        input_ids_1, input_masks_1, input_segments_1 = return_id(
            str1, 'longest_first', max_sequence_length)

        return [input_ids_1, input_masks_1, input_segments_1]
    
    else:
        
        input_ids, input_masks, input_segments = return_id(
            str1, 'longest_first', max_sequence_length)

        return [input_ids, input_masks, input_segments,
                None, None, None]        

def compute_input_arrays(df, columns, tokenizer, max_sequence_length, double=True):
    
    input_ids_1, input_masks_1, input_segments_1 = [], [], []
    for _, instance in tqdm(df[columns].iterrows(), total=len(df)):
        str1 = instance[columns[0]]

        ids_1, masks_1, segments_1 = \
        convert_to_transformer_inputs(str1, tokenizer, max_sequence_length, double=double)
        
        input_ids_1.append(ids_1)
        input_masks_1.append(masks_1)
        input_segments_1.append(segments_1)

        
    if double:
        
        return [np.asarray(input_ids_1, dtype=np.int32), 
                np.asarray(input_masks_1, dtype=np.int32), 
                np.asarray(input_segments_1, dtype=np.int32)]
    
    else:
        
        return [np.asarray(input_ids_1, dtype=np.int32), 
                np.asarray(input_masks_1, dtype=np.int32), 
                np.asarray(input_segments_1, dtype=np.int32)]

## Load Tokenizer from transformers

We will use a pretrained bert model `bert-base-cased` for both Tokenizer and our classifier.

In [None]:
import tensorflow as tf
from transformers import AutoTokenizer

model_name_en = "bert-base-cased"
model_name_hi = "bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name_en)

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

In [None]:
df_train

Unnamed: 0,code_mixed_slots,code_mixed_sentence,intent,intents
0,"B-fromloc.city_name,B-fromloc.state_code,O,B-t...",washington dc se boston ke lie udaanon kee soo...,flight#ground_service,0
1,"B-airline_name,I-airline_name,O,B-meal_descrip...",tower air par snacks diye jaate hain kya also ...,meal#airfare#flight,1
2,"B-fromloc.city_name,O,B-toloc.city_name,I-tolo...",baltimore se san francisco tak udaanen,flight,2
3,"B-fromloc.city_name,I-fromloc.city_name,O,B-to...",kansas city se chicago ke lie kaun see udaanen...,flight#abbreviation,3
4,"B-depart_time.start_time,O,B-depart_time.end_t...",8 se 9 pm ke beech denver aane ya jaane vaalee...,flight,2
...,...,...,...,...
15651,"O,B-fromloc.city_name,I-fromloc.city_name,O,B-...",mujhe westchester county se cincinnati ke lie ...,flight,2
15652,"O,B-fromloc.city_name,I-fromloc.city_name,I-fr...",main dallas fort worth se philadelphia ke lie ...,flight#flight#airfare,47
15653,"O,O,O,O,O,O,O,O,B-fromloc.city_name,I-fromloc....",these flights kee keemat kya hai and mujhe san...,airfare#flight,4
15654,"B-fromloc.city_name,O,B-toloc.city_name,O,O,O,...",boston se philadelphia jaane ke lie aapake paa...,flight,2


# Encode texts from the dataset

We have to encode the texts using the tokenizer to create tensors for training the classifier.

In [None]:
# https://huggingface.co/transformers/preprocessing.html

def encode_texts(tokenizer, texts):
    return tokenizer(texts, padding=True, truncation=True, return_tensors="tf")

texts = [d for d in df_train['code_mixed_sentence']]
tds = encode_texts(tokenizer, texts)
tds.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

In [None]:
encoded_texts = tds

In [None]:
print(encoded_texts)

{'input_ids': <tf.Tensor: shape=(15656, 164), dtype=int32, numpy=
array([[  101, 13445,  1633, ...,     0,     0,     0],
       [  101,  3590,  1586, ...,     0,     0,     0],
       [  101,   171,  1348, ...,     0,     0,     0],
       ...,
       [  101,  1292,  7306, ...,     0,     0,     0],
       [  101,   171, 15540, ...,     0,     0,     0],
       [  101,   180, 15615, ...,     0,     0,     0]], dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(15656, 164), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(15656, 164), dtype=int32, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]], dtype

## Encode labels
### Intents

### Slots

To padd all the texts to the same length, the tokenizer will use special characters. To handle those we need to add <PAD> to slots_names. It can be some other symbol as well.

In [None]:
df_train["intent"]
intents
index = [0]*len(intents)

full = []
for i in df_train["intent"]:
  lis = i.split("#")
  a = index.copy()
  for j in lis:
    ind = intents.index(j)
    a[ind] = 1
  full.append(a)
  a=index.copy
len(full[0])
    
  


18

In [None]:
len(intents)

18

In [None]:
index = [0]*len(intents)
index

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [None]:
encoded_intents = tf.convert_to_tensor(full, dtype="int32")
encoded_intents

<tf.Tensor: shape=(15656, 18), dtype=int32, numpy=
array([[1, 1, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]], dtype=int32)>

In [None]:
#len(slot_map)

In [None]:
slot_names = set()
for td in df.loc[:,"code_mixed_slots"]:
    for slot in td.split(','):
        slot_names.add(slot)
slot_names = list(slot_names)
slot_names.insert(0, "<PAD>")
slot_names.insert(1, "UNK")
slot_names

['<PAD>',
 'UNK',
 '',
 'B-today_relative',
 'B-depart_date.date_relative',
 ' O',
 'B-round_trip',
 'B-fromloc.state_code',
 'what is hp',
 'B-stoploc.city_name',
 'I-flight_stop',
 'o',
 'B-arrive_date.month_name',
 '-arrive_time.time_relative',
 'B-toloc.state_code',
 ' B-toloc.city_name',
 'I-state_name',
 'B-airport_name',
 'B-toloc.airport_code',
 'I-flight_number',
 'I-arrive_time.start_time',
 ' B-arrive_time.time',
 'OO',
 'B-return_date.date_relative',
 'B-period_of_day',
 'B-arrive_time.time_relative',
 'I-return_date.day_number',
 'I-arrive_time.time',
 'B-depart_date.today_relative',
 'I-arrive_time.time_relative',
 'I-fromloc.city_name',
 'B-connect',
 'I-city_name',
 'B-depart_date.month_name',
 'I-depart_time.time',
 'I-class_type',
 '-fromloc.airport_name',
 'I-cost_relative',
 'B-airport_code',
 'B-arrive_date.day_name',
 ' B-fromloc.city_name',
 'I-depart_date.day_number',
 'B-economy',
 'B-flight_mod',
 'I-airport_name',
 'B-fromloc.airport_code',
 'B-arrive_time.ti

In [None]:
slot_map = dict() # slot -> index
for idx, us in enumerate(slot_names):
    slot_map[us] = idx
slot_map

{'': 2,
 ' B-arrive_time.time': 21,
 ' B-depart_time.time': 122,
 ' B-fromloc.city_name': 40,
 ' B-toloc.city_name': 15,
 ' O': 5,
 '-arrive_time.time_relative': 13,
 '-class_type': 128,
 '-cost_relative': 118,
 '-depart_date.day_name': 131,
 '-depart_date.month_name': 141,
 '-depart_time.period_of_day': 101,
 '-depart_time.time_relative': 56,
 '-fromloc.airport_name': 36,
 '-fromloc.city_name': 71,
 '<PAD>': 0,
 'B-aircraft_code': 150,
 'B-airline_code': 50,
 'B-airline_name': 51,
 'B-airport_code': 38,
 'B-airport_name': 17,
 'B-arrive_date.date_relative': 62,
 'B-arrive_date.day_name': 39,
 'B-arrive_date.day_number': 48,
 'B-arrive_date.month_name': 12,
 'B-arrive_date.today_relative': 83,
 'B-arrive_time.end_time': 73,
 'B-arrive_time.period_mod': 65,
 'B-arrive_time.period_of_day': 52,
 'B-arrive_time.start_time': 145,
 'B-arrive_time.time': 46,
 'B-arrive_time.timeB-toloc.city_name': 112,
 'B-arrive_time.time_relative': 25,
 'B-booking_class': 105,
 'B-city_name': 115,
 'B-class

In [None]:
def compute_slot_labels(df, column):
    all_sentences = []
    for slot in df[column]:
        
        slot_labels = []

        for s in slot.split(','):
            slot_labels.append(slot_map[s] if s in slot_map else slot_map["UNK"])
        all_sentences.append(slot_labels)
    return all_sentences

In [None]:
import numpy as np
encoded_slots = compute_slot_labels(df_train, "code_mixed_slots")


In [None]:
MAX_SEQUENCE_LENGTH = 164

In [None]:
def encoded_slot_array(df):
  encoded_slots_array = np.zeros(shape=(len(df['code_mixed_sentence']), MAX_SEQUENCE_LENGTH), dtype=np.int32)
  for i, slot in enumerate(encoded_slots):
    encoded_slots_array[i, 0: len(slot)] = slot
  return encoded_slots_array

In [None]:










encoded_slots_array = encoded_slot_array(df_train)

In [None]:
encoded_slots[0]
encoded_slots_array[0]

array([133,   7,  79,  66,  79,  79,  79,  79,  79,  79,  79, 115,  32,
        79,  79,  79,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0], dtype=int32)

## Classifier Model

### Definition

In [None]:
from transformers import TFBertModel
from tensorflow.keras.layers import Dropout, Dense, GlobalAveragePooling1D, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy

class JointIntentAndSlotFillingModel(tf.keras.Model):

    def __init__(self, intent_num_labels=None, slot_num_labels=None,
                 model_name_en=model_name_en,model_name_hi=model_name_hi, dropout_prob=0.1):
        super().__init__(name="joint_intent_slot")
        self.bert_en = TFBertModel.from_pretrained(model_name_en)
        self.bert_hi = TFBertModel.from_pretrained(model_name_hi)
        self.dropout = Dropout(dropout_prob)
        self.intent_classifier = Dense(intent_num_labels,
                                       name="intent_classifier")
        self.slot_classifier = Dense(slot_num_labels,
                                     name="slot_classifier")

    def call(self, inputs, **kwargs):
        # two outputs from BERT
        trained_bert_en = self.bert_en(inputs, **kwargs)
        trained_bert_hi = self.bert_hi(inputs, **kwargs)

        pooled_output_en = trained_bert_en.pooler_output
        sequence_output_en = trained_bert_en.last_hidden_state

        pooled_output_hi = trained_bert_hi.pooler_output
        sequence_output_hi = trained_bert_hi.last_hidden_state
        
        # sequence_output will be used for slot_filling / classification
        sequence_output_en = self.dropout(sequence_output_en,
                                       training=kwargs.get("training", False))
        sequence_output_hi = self.dropout(sequence_output_hi,
                                       training=kwargs.get("training", False))
        sequence_output = Concatenate()([sequence_output_en, sequence_output_hi])
        slot_logits = self.slot_classifier(sequence_output)

        # pooled_output for intent classification
        pooled_output_en = self.dropout(pooled_output_en,
                                     training=kwargs.get("training", False))
        pooled_output_hi = self.dropout(pooled_output_hi,
                                     training=kwargs.get("training", False))
        pooled_output = Concatenate()([pooled_output_en, pooled_output_hi])
        intent_logits = self.intent_classifier(pooled_output)
        
        return slot_logits, intent_logits

In [None]:

from tensorflow.keras.losses  import BinaryCrossentropy

In [None]:
joint_model = JointIntentAndSlotFillingModel(
    intent_num_labels=len(intents), slot_num_labels=len(slot_map))

Downloading:   0%|          | 0.00/502M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.01G [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-multilingual-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


### Hyperparams, Optimizer and Loss function

In [None]:
opt = Adam(learning_rate=3e-5, epsilon=1e-08)

# two outputs, one for slots, another for intents
# we have to fine tune for both
losses = [SparseCategoricalCrossentropy(from_logits=True),
          BinaryCrossentropy(from_logits=True)]

metrics = ["accuracy"]
# compile model
joint_model.compile(optimizer=opt, loss=losses, metrics=metrics)

### Train

In [None]:
x = {"input_ids": encoded_texts["input_ids"], "token_type_ids": encoded_texts["token_type_ids"],  "attention_mask": encoded_texts["attention_mask"]}

In [None]:
history = joint_model.fit(
    x, (encoded_slots_array, encoded_intents), epochs=1, batch_size=16, shuffle=True, verbose = 2)

## Inference

In [None]:
def nlu(text, tokenizer, model, intent_names, slot_names):
    inputs = tf.constant(tokenizer.encode(text))[None, :]  # batch_size = 1
    outputs = model(inputs)
    slot_logits, intent_logits = outputs

    slot_ids = slot_logits.numpy().argmax(axis=-1)[0, :]
    intent_id = intent_logits.numpy().argmax(axis=-1)[0]

    info = {"intent": intent_names[intent_id], "slots": {}}

    out_dict = {}
    # get all slot names and add to out_dict as keys
    predicted_slots = set([slot_names[s] for s in slot_ids if s != 0])
    for ps in predicted_slots:
      out_dict[ps] = []

    # check if the text starts with a small letter
    if text[0].islower():
      tokens = tokenizer.tokenize(text, add_special_tokens=True)
    else:
      tokens = tokenizer.tokenize(text)
    for token, slot_id in zip(tokens, slot_ids):
        # add all to out_dict
        slot_name = slot_names[slot_id]

        if slot_name == "<PAD>":
            continue

        # collect tokens
        collected_tokens = [token]
        idx = tokens.index(token)

        # see if it starts with ##
        # then it belongs to the previous token
        if token.startswith("##"):
          # check if the token already exists or not
          if tokens[idx - 1] not in out_dict[slot_name]:
            collected_tokens.insert(0, tokens[idx - 1])

        # add collected tokens to slots
        out_dict[slot_name].extend(collected_tokens)

    # process out_dict
    for slot_name in out_dict:
        tokens = out_dict[slot_name]
        slot_value = tokenizer.convert_tokens_to_string(tokens)

        info["slots"][slot_name] = slot_value.strip()

    return info


In [None]:
df_test.head()

In [None]:
nlu("i want a return flight from washington to dallas on american airlines", tokenizer, joint_model, 
    intent_names, slot_names)

In [None]:
nlu("add Brian May to my Reggae Infusions list", tokenizer, joint_model, 
    intent_names, slot_names)

In [None]:
import calendar
import time

# to generate timestamps for prediction file
def get_time_stamp():
    ts = calendar.timegm(time.gmtime())
    return ts

get_time_stamp()

## Generate prediction.json

This section creates a file containing all the prediction results for inputs from dev.json

In [None]:
def read_dev_data(file="dev.json"):
    dev_texts = []
    with open(file, "r", encoding="utf-8") as json_file:
        data = json.load(json_file)

        for k in data.keys():
          text = data[k]["text"]
          dev_texts.append(text)
          
    return dev_texts
dev_texts = read_dev_data()

In [None]:
from tqdm import tqdm

results = []
for i in tqdm(range(len(dev_texts))):
    res = nlu(dev_texts[i], tokenizer, joint_model, intent_names, slot_names)
    results.append(res)

In [None]:
# process results
results_dict = dict()

for idx, res in enumerate(results):
    results_dict[str(idx)] = res

In [None]:
with open("prediction.json", "w") as f:
    json.dump(results_dict, f, indent=2)

In [None]:
!head prediction.json