# Install and import necessary libraries and packages

Instructions: connect to a GPU and click on run all. Provided that paths to CSV files are correct, everything should execute on its own. Scroll down to the bottom of the notebook for the results.

In [None]:
!pip install transformers

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

import transformers
from transformers import AutoTokenizer
from transformers import  DistilBertForTokenClassification, BertModel

import torch
import torch.nn as nn
from torch.optim import SGD
import torch.nn.functional as F
from torch.utils.data import DataLoader

from sklearn.metrics import accuracy_score,f1_score, precision_score, recall_score, classification_report

In [None]:
import warnings
warnings.filterwarnings('ignore')

# Reading and preparing the data
Read data in CSV format. The data file should contain the columns 'sentences' (or 'sentence') and 'tags'. The former should contain plain-text sentences (non-tokenized), and the latter should contain a list of IOB tags.

In [None]:
train_df = pd.read_csv('../../data/bert_train_iob.csv')
test_df = pd.read_csv('../../data/bert_test_iob.csv')

if 'Unnamed: 0' in train_df.columns:
  train_df.drop(columns=['Unnamed: 0'], inplace=True)

if 'Unnamed: 0' in test_df.columns:
  test_df.drop(columns=['Unnamed: 0'], inplace=True)

# Pandas reads lists as strings. Evaluate them into lists
train_df['tags'] = train_df['tags'].apply(lambda x: eval(x))
test_df['tags'] = test_df['tags'].apply(lambda x: eval(x))

In [None]:
# combine labels from training and testing set into a list
unified_labels = []
unified_labels.extend(train_df.tags.to_list())
unified_labels.extend(test_df.tags.to_list())

# extract all labels from the 'iob' column of the dataframe
labels = [label for row in unified_labels for label in row]

# create a list with unique labels from our data set
label_list = list(set(labels))
print(f"List of unique labels: {label_list}")

# create dictionaries that map labels to numeric IDs and vice-versa
labels_to_ids = {lbl: id for id, lbl in enumerate(sorted(label_list))}
ids_to_labels = {id: lbl for id, lbl in enumerate(sorted(label_list))}

# join tags back into a string after performing all operations that require a list format
train_df['tags'] = train_df['tags'].apply(lambda x: ' '.join(x))
test_df['tags'] = test_df['tags'].apply(lambda x: ' '.join(x))

train_df.rename(columns={'sentences': 'sentence'}, inplace=True)
test_df.rename(columns={'sentences': 'sentence'}, inplace=True)

# Generate validation data from 20% of the training data set
df_train, df_val = np.split(train_df.sample(frac=1, random_state=42), [int(.8 * len(train_df))])
df_test = test_df

List of unique labels: ['I-ORG', 'B-OTHER_PERSON', 'I-WITNESS', 'I-DATE', 'B-PROVISION', 'B-ORG', 'B-CASE_NUMBER', 'I-GPE', 'B-RESPONDENT', 'I-STATUTE', 'I-PRECEDENT', 'B-LAWYER', 'I-PETITIONER', 'B-WITNESS', 'I-LAWYER', 'B-JUDGE', 'O', 'I-JUDGE', 'B-COURT', 'B-PRECEDENT', 'I-CASE_NUMBER', 'B-PETITIONER', 'B-STATUTE', 'I-OTHER_PERSON', 'I-PROVISION', 'I-COURT', 'I-RESPONDENT', 'B-GPE', 'B-DATE']


In [None]:
# CustomDataset allows us to retrieve batches of variable lengths of the data for training and validating
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self, df):
    self.texts = []
    self.labels = []
    list_of_tags = []

    tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

    for value in df['tags'].values.tolist():
      splitted = value.split()
      list_of_tags.append(splitted)
    
    texts = df["sentence"].values.tolist()

    for t in texts:
      encoded_text = tokenizer(t, padding="max_length", truncation=True, return_tensors="pt")
      self.texts.append(encoded_text)

    for text, tags in zip(self.texts, list_of_tags):
      word_ids = text.word_ids()
      pwid = None
      label_ids = []

      for id in word_ids:
        if id is None:
            label_ids.append(-100)
        else :
            try:
              ref = tags[id]
              label_ids.append(labels_to_ids[ref])
            except:
              label_ids.append(-100)
              
            pwid = id

      self.labels.append(label_ids)

  # this method is not called directly but is referenced behind the scenes
  # removing it throws an error
  def __len__(self):
    return len(self.labels)

  def __getitem__(self, id):
    txt = self.texts[id]
    lbl = self.labels[id]

    return txt, torch.LongTensor(lbl)

# Defining the DistilBERT model

In [None]:
class Bert(nn.Module):
 
  def __init__(self, label_count):
    super(Bert, self).__init__()
    self.bert = DistilBertForTokenClassification.from_pretrained("distilbert-base-uncased", num_labels = label_count)

  def forward(self, input_ids, attention_mask, labels = None):

    # we need this because we don't provide the labels when we predict a sentence
    if labels == None:
      output = self.bert(input_ids = input_ids, attention_mask = attention_mask)
      return output

    # this is executed during training and testing, since labels are provided
    output = self.bert(input_ids = input_ids, attention_mask = attention_mask , labels = labels)
    return output

  def fit(self, train_dataset, dev_dataset, optimizer,  batch_size, epochs):

    self.results = {
      'train': {'acc': [], 'loss': [], 'f1': [], 'precision': [], 'recall': []},
      'val': {'acc': [], 'loss': [], 'f1': [], 'precision': [], 'recall': []},
      'test': {'acc': [], 'loss': [], 'f1': [], 'precision': [], 'recall': []}
    }

    early_stopper = EarlyStopper(patience=3, min_delta=5)

    train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
    dev_dataloader = DataLoader(dev_dataset, batch_size = batch_size, shuffle = True)

    for epoch in range(epochs):

      print('########### EPOCH: ' + str(epoch)) 
      
      # track metrics. reset with every new epoch
      total_acc = 0
      total_f1 = 0
      total_precision = 0
      total_recall = 0

      total_loss_train = 0

      # Enter training mode
      self.train()

      for train_data, train_label in tqdm(train_dataloader):

        train_label = train_label.to(device)

        # extract input IDs and attention masks to feed to the model
        mask = train_data['attention_mask'].squeeze(1).to(device)
        input_id = train_data['input_ids'].squeeze(1).to(device)

        # set the gradients of all params to zero before using stochastic gradient descent.
        # prevents gradients from previous batches from accumulating
        optimizer.zero_grad()
        
        # extract loss and logits to measure performance. flatten lists to 1D
        output = self(input_id, mask, train_label)
        loss, logits = output.loss, output.logits
        predictions = logits.argmax(dim= -1).flatten() 
        train_label = train_label.flatten()

        # NumPy doesn't utilize the GPU. Convert everything to CPU to use sklearn.metrics methods
        predictions = predictions[train_label != -100].data.cpu().numpy()
        train_label = train_label[train_label != -100].data.cpu().numpy()

        # calculate metrics
        acc = accuracy_score(train_label, predictions)
        f1 = f1_score(train_label, predictions, average="macro")
        precision = precision_score(train_label, predictions, average="macro")
        recall = recall_score(train_label, predictions, average="macro")

        total_acc += acc
        total_f1 += f1
        total_precision += precision
        total_recall += recall
        total_loss_train += loss.item()

        # perform backpropagation to compute loss gradients
        loss.backward()

        # update parameters based on what was calculated during backpropagation
        optimizer.step()

      print("TRAIN")
      print('loss: ' + str(total_loss_train))
      print(f"LOSS: {total_loss_train / len(train_dataset)}")
      print(f"Acc: {round(total_acc / len(train_dataloader), 2)} | F1: {round(total_f1 / len(train_dataloader), 2)} | Precision: {round(total_precision / len(train_dataloader), 2)} | Recall: {round(total_recall / len(train_dataloader), 2)}")


      self.results['train']['acc'].append(round(total_acc / len(train_dataloader), 2))
      self.results['train']['precision'].append(round(total_precision / len(train_dataloader),2))
      self.results['train']['recall'].append(round(total_recall / len(train_dataloader), 2))
      self.results['train']['f1'].append(round(total_f1 / len(train_dataloader), 2))
      self.results['train']['loss'].append(total_loss_train / len(train_dataset))
      
      # enter evaluation mode: disable dropout and batch normalization to prevent overfitting          
      self.eval()

      total_acc = 0
      total_f1 = 0
      total_precision = 0
      total_recall = 0

      total_loss_dev = 0
      
      with torch.no_grad():
        for dev_data, dev_label in dev_dataloader:

          dev_label = dev_label.to(device)

          mask = dev_data['attention_mask'].squeeze(1).to(device)
          input_id = dev_data['input_ids'].squeeze(1).to(device)

          output = self(input_id, mask, dev_label)
          loss, logits = output.loss, output.logits

          predictions = logits.argmax(dim= -1).flatten() 
          dev_label = dev_label.flatten()
          predictions = predictions[dev_label != -100].data.cpu().numpy()
          dev_label = dev_label[dev_label != -100].data.cpu().numpy()    

          acc = accuracy_score(dev_label, predictions)
          f1 = f1_score(dev_label, predictions, average="macro")
          precision = precision_score(dev_label, predictions, average="macro")
          recall = recall_score(dev_label, predictions, average="macro")

          total_acc += acc
          total_f1 += f1
          total_precision += precision
          total_recall += recall

          total_loss_dev += loss.item()

      print("VALIDATION")
      print(f"LOSS: {total_loss_dev / len(dev_dataset)}")
      print(f"Acc: {round(total_acc / len(dev_dataloader), 2)} | F1: {round(total_f1 / len(dev_dataloader), 2)} | Precision: {round(total_precision / len(dev_dataloader), 2)} | Recall: {round(total_recall / len(dev_dataloader), 2)}")

      self.results['val']['acc'].append(round(total_acc / len(dev_dataloader), 2))
      self.results['val']['precision'].append(round(total_precision / len(dev_dataloader), 2))
      self.results['val']['recall'].append(round(total_recall / len(dev_dataloader), 2))
      self.results['val']['f1'].append(round(total_f1 / len(dev_dataloader), 2))
      self.results['val']['loss'].append(total_loss_dev / len(dev_dataset))

      
      if early_stopper.early_stop(total_loss_dev):             
          break

  # when evaluating a model, you can also use a pre-saved model instead of training from scrach.
  # just provide the model as the second argument when calling the evaluate method
  def evaluate(self, test_df, model = None):
    self.classification_report = None
    self.evaluation_score = {
        'f1': None,
        'acc': None,
        'precision': None,
        'recall': None,
        'loss': None
    }

    all_true = []
    all_predicted = []

    total_acc = 0
    total_f1 = 0
    total_precision = 0
    total_recall = 0

    total_loss_test = 0

    test_dataset = CustomDataset(df_test)
    test_dataloader = DataLoader(test_dataset, batch_size = 2, shuffle = True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # decide whether we're evaluating a model we just trained or one that was provided as an arg
    if model:
      model = model.to(device)
    else:
      self = self.to(device)

    for test_data, test_label in tqdm(test_dataloader):
        test_label = test_label.to(device)

        mask = test_data['attention_mask'].squeeze(1).to(device)
        input_id = test_data['input_ids'].squeeze(1).to(device)

        if model:
          output = model(input_id, mask, test_label)
        else:
          output = self(input_id, mask, test_label)

        loss, logits = output.loss, output.logits

        predictions = logits.argmax(dim=-1).flatten()
        test_label = test_label.flatten()

        predictions = predictions[test_label != -100].data.cpu().numpy()
        test_label = test_label[test_label != -100].data.cpu().numpy()

        all_true.extend(test_label)
        all_predicted.extend(predictions)

        acc = accuracy_score(test_label, predictions)
        f1 = f1_score(test_label, predictions, average="macro")
        precision = precision_score(test_label, predictions, average="macro")
        recall = recall_score(test_label, predictions, average="macro")
        
        total_acc += acc
        total_f1 += f1
        total_precision += precision
        total_recall += recall

        total_loss_test += loss.item()
        
    print("VALIDATION")
    print(f"LOSS: {total_loss_test / len(test_dataset)}")
    print(f"Acc: {round(total_acc / len(test_dataloader),3)} | F1: {round(total_f1 / len(test_dataloader), 3)} | Precision: {round(total_precision / len(test_dataloader), 3)} | Recall: {round(total_recall / len(test_dataloader), 3)}")

    self.evaluation_score['f1'] = round(total_f1 / len(test_dataloader), 3)
    self.evaluation_score['precision'] = round(total_precision / len(test_dataloader), 3)
    self.evaluation_score['recall'] = round(total_recall / len(test_dataloader), 3)
    self.evaluation_score['acc']  = round(total_acc / len(test_dataloader),3)
    self.evaluation_score['loss'] = total_loss_test / len(test_dataset)

    self.classification_report = classification_report(all_true, all_predicted, output_dict=True)

  def predict_single(self, sentence, model = None):

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:
        model = model.cuda()   

    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
    text = tokenizer(sentence , padding = "max_length" , truncation = True, return_tensors = "pt" )

    label_all_tokens = False
    word_ids = text.word_ids()
    pwid = None
    label_ids = []

    for id in word_ids:
        if id is None:
            label_ids.append(-100)
        elif id != pwid:
            try:
                label_ids.append(labels_to_ids[labels[id]])
            except:
                label_ids.append(-100)
        else:
            try:
                label_ids.append(labels_to_ids[labels[id]] if label_all_tokens else -100)
            except:
                label_ids.append(-100)
        pwid = id


    mask = text['attention_mask'].to(device)
    input_id = text['input_ids'].to(device)
    label_ids = torch.Tensor(label_ids).unsqueeze(0).to(device)

    if model:
      logits = model(input_id, mask, None)
    else:
      logits = self(input_id, mask, None)

    logits_clean = logits[0][label_ids != -100]
    
    predictions = logits_clean.argmax(dim=1).tolist()
    prediction_label = [ids_to_labels[i] for i in predictions]
    print(tokenizer.tokenize(sentence))
    return prediction_label

In [None]:
# class to stop training early if it is showing overfitting tendency
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [None]:
# convert tags to labels

def tags_to_labels(tags, labels_to_ids):
  tag_indices = []
  for tag in tags.split():
      if tag in labels_to_ids:
          tag_indices.append(labels_to_ids[tag])
      else:
          tag_indices.append(o_label)

  return tag_indices

In [None]:
# 'O' denotes a token that is not a named-entity defined in our list
o_label = labels_to_ids["O"]

for df in [df_train, df_val, df_test]:
  df["labels"] = df["tags"].apply(lambda tags: tags_to_labels(tags, labels_to_ids))


# Training & evaluating the model


*   model.fit(train_df, val_df, optimizer, batch_size, epochs) -- train the model
*   model.results -- get training metrics
*   model.evaluate(test_df, model) -- evaluate the model using test data. 'model' arg is optional
*   model.evaluation_score -- get F1, precision, accuraccy, and loss
*   model.classification_report -- get all metrics for individual named entities
*   model.predict_single(sentence, model) -- provide a non-tokenized sentence and get back labels for each word



In [None]:
# instantiate the class
model = Bert(len(label_list))

# use GPU if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

train_dataset = CustomDataset(df_train)
val_dataset = CustomDataset(df_val)

# define parameters for training
lr = 1e-2
momentum = 0.9
batch_size = 32
epochs = 10
optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)  

# train the model
model.fit(train_dataset, val_dataset, optimizer, batch_size, epochs)

# save the model for later use
torch.save(model, '../../saved_models/BERT/model_final.pth')

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN t

########### EPOCH: 0


100%|██████████| 275/275 [06:18<00:00,  1.38s/it]


TRAIN
loss: 235.30771017074585
LOSS: 0.026751672370480428
Acc: 0.8 | F1: 0.13 | Precision: 0.16 | Recall: 0.13
VALIDATION
LOSS: 0.018247363362978025
Acc: 0.83 | F1: 0.22 | Precision: 0.29 | Recall: 0.21
########### EPOCH: 1


100%|██████████| 275/275 [06:18<00:00,  1.38s/it]


TRAIN
loss: 146.48887345194817
LOSS: 0.016654032907224666
Acc: 0.84 | F1: 0.3 | Precision: 0.39 | Recall: 0.29
VALIDATION
LOSS: 0.015470505804297815
Acc: 0.84 | F1: 0.39 | Precision: 0.47 | Recall: 0.4
########### EPOCH: 2


100%|██████████| 275/275 [06:19<00:00,  1.38s/it]


TRAIN
loss: 118.5966997295618
LOSS: 0.013483026344879696
Acc: 0.86 | F1: 0.42 | Precision: 0.53 | Recall: 0.4
VALIDATION
LOSS: 0.013106445241050322
Acc: 0.86 | F1: 0.47 | Precision: 0.54 | Recall: 0.47
########### EPOCH: 3


100%|██████████| 275/275 [06:19<00:00,  1.38s/it]


TRAIN
loss: 100.99392771720886
LOSS: 0.011481801695908239
Acc: 0.88 | F1: 0.49 | Precision: 0.6 | Recall: 0.47
VALIDATION
LOSS: 0.012595540979007202
Acc: 0.87 | F1: 0.52 | Precision: 0.61 | Recall: 0.5
########### EPOCH: 4


100%|██████████| 275/275 [06:18<00:00,  1.38s/it]


TRAIN
loss: 88.65078289806843
LOSS: 0.010078533753759484
Acc: 0.89 | F1: 0.55 | Precision: 0.65 | Recall: 0.53
VALIDATION
LOSS: 0.011454672332837833
Acc: 0.88 | F1: 0.55 | Precision: 0.63 | Recall: 0.53
########### EPOCH: 5


100%|██████████| 275/275 [06:18<00:00,  1.38s/it]


TRAIN
loss: 79.55221700668335
LOSS: 0.009044135630591558
Acc: 0.9 | F1: 0.6 | Precision: 0.69 | Recall: 0.58
VALIDATION
LOSS: 0.010640192907362865
Acc: 0.88 | F1: 0.6 | Precision: 0.66 | Recall: 0.6
########### EPOCH: 6


100%|██████████| 275/275 [06:18<00:00,  1.38s/it]


TRAIN
loss: 72.44942620396614
LOSS: 0.008236633265571412
Acc: 0.91 | F1: 0.63 | Precision: 0.71 | Recall: 0.61
VALIDATION
LOSS: 0.011035778065495407
Acc: 0.89 | F1: 0.6 | Precision: 0.67 | Recall: 0.58
########### EPOCH: 7


100%|██████████| 275/275 [06:19<00:00,  1.38s/it]


TRAIN
loss: 67.31039176136255
LOSS: 0.007652386512205838
Acc: 0.91 | F1: 0.66 | Precision: 0.73 | Recall: 0.64
VALIDATION
LOSS: 0.010711795195984591
Acc: 0.88 | F1: 0.61 | Precision: 0.64 | Recall: 0.64
########### EPOCH: 8


100%|██████████| 275/275 [06:19<00:00,  1.38s/it]


TRAIN
loss: 63.99589063227177
LOSS: 0.007275567375201429
Acc: 0.92 | F1: 0.68 | Precision: 0.75 | Recall: 0.66
VALIDATION
LOSS: 0.010295667174378543
Acc: 0.89 | F1: 0.62 | Precision: 0.68 | Recall: 0.62
########### EPOCH: 9


100%|██████████| 275/275 [06:19<00:00,  1.38s/it]


TRAIN
loss: 58.37360428273678
LOSS: 0.006636380659701771
Acc: 0.92 | F1: 0.7 | Precision: 0.76 | Recall: 0.68
VALIDATION
LOSS: 0.010967901358554558
Acc: 0.89 | F1: 0.62 | Precision: 0.69 | Recall: 0.61


In [None]:
model = torch.load('../../saved_models/BERT/model_final.pth')

### Training scores (Accuracy, Loss, F1, Prediction and Recall)

In [None]:
print('Training results per epoch')
pd.DataFrame(model.results['train']).transpose()

Training results per epoch


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
acc,0.8,0.84,0.86,0.88,0.89,0.9,0.91,0.91,0.92,0.92
loss,0.026752,0.016654,0.013483,0.011482,0.010079,0.009044,0.008237,0.007652,0.007276,0.006636
f1,0.13,0.3,0.42,0.49,0.55,0.6,0.63,0.66,0.68,0.7
precision,0.16,0.39,0.53,0.6,0.65,0.69,0.71,0.73,0.75,0.76
recall,0.13,0.29,0.4,0.47,0.53,0.58,0.61,0.64,0.66,0.68


### Validation scores (Accuracy, Loss, F1, Prediction and Recall)

In [None]:
print('Validation results per epoch')
pd.DataFrame(model.results['val']).transpose()

Validation results per epoch


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
acc,0.83,0.84,0.86,0.87,0.88,0.88,0.89,0.88,0.89,0.89
loss,0.018247,0.015471,0.013106,0.012596,0.011455,0.01064,0.011036,0.010712,0.010296,0.010968
f1,0.22,0.39,0.47,0.52,0.55,0.6,0.6,0.61,0.62,0.62
precision,0.29,0.47,0.54,0.61,0.63,0.66,0.67,0.64,0.68,0.69
recall,0.21,0.4,0.47,0.5,0.53,0.6,0.58,0.64,0.62,0.61


### Testing scores

In [None]:
test_dataset = CustomDataset(df_test)

model.evaluate(test_dataset, model)

In [None]:
pd.DataFrame(model.evaluation_score,index=['score'])

Unnamed: 0,f1,acc,precision,recall,loss
score,0.669,0.91,0.686,0.677,0.152539


#### Classification report

In [None]:
creport = pd.DataFrame(model.classification_report).transpose().reset_index()
creport['index'][:29] = creport['index'][:29].apply(lambda x: ids_to_labels[int(x)])
creport

Unnamed: 0,class,precision,recall,f1-score,support
0,B-CASE_NUMBER,0.55,0.5,0.53,145
1,B-COURT,0.79,0.66,0.72,314
2,B-DATE,0.78,0.67,0.72,212
3,B-GPE,0.49,0.27,0.35,189
4,B-JUDGE,0.53,0.37,0.43,133
5,B-LAWYER,0.0,0.0,0.0,512
6,B-ORG,0.62,0.48,0.54,189
7,B-OTHER_PERSON,0.8,0.63,0.7,410
8,B-PETITIONER,0.46,0.21,0.29,237
9,B-PRECEDENT,0.78,0.63,0.7,265


In [None]:
print(model.classification_report)

              precision    recall  f1-score   support

           0       0.55      0.50      0.53       145
           1       0.79      0.66      0.72       314
           2       0.78      0.67      0.72       212
           3       0.49      0.27      0.35       189
           4       0.53      0.37      0.43       133
           5       0.00      0.00      0.00       512
           6       0.62      0.48      0.54       189
           7       0.80      0.63      0.70       410
           8       0.46      0.21      0.29       237
           9       0.78      0.63      0.70       265
          10       0.79      0.72      0.75       260
          11       0.48      0.05      0.08       333
          12       0.79      0.55      0.65       214
          13       0.67      0.72      0.69        81
          14       0.74      0.88      0.81      1138
          15       0.91      0.77      0.83      1479
          16       0.89      0.89      0.89       781
          17       0.45    

# Predicting a sentence

In [None]:
sentence = 'High Court of Dhaka in Mumbai, witness Ichcaki Uttaran'
model.predict_single(sentence)

['high', 'court', 'of', 'dhaka', 'in', 'mumbai', ',', 'witness', 'ich', '##ca', '##ki', 'uttar', '##an']


['B-COURT',
 'I-COURT',
 'I-COURT',
 'I-COURT',
 'I-COURT',
 'B-GPE',
 'O',
 'O',
 'B-WITNESS',
 'I-WITNESS']