# Objective: To detect hate speech using RNN (GRU and LSTM)

1. Loading and Preprocessing the data

2. Training classifier using RNN

3. Validating and quantifying the model performance

## Set Up

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# Import required libraries
from google.colab import drive

import numpy as np
import pandas as pd
import random
import re
import sklearn

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset

import nltk
from nltk.corpus import stopwords

In [None]:
#set seed

def set_seeds(seed=1234):
    """Set seeds for reproducibility."""
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


In [None]:
SEED = 1234
# Set seeds for reproducibility
set_seeds(seed=SEED)

In [None]:
# Set device
cuda = True
device = torch.device("cuda" if (
    torch.cuda.is_available() and cuda) else "cpu")
torch.set_default_tensor_type("torch.FloatTensor")
if device.type == "cuda":
    torch.set_default_tensor_type("torch.cuda.FloatTensor")
print (device)

cuda


## Load and Preprocess data

In [None]:
# Read the data
hspeech_df = pd.read_csv("./hate_speech_data.csv")
hspeech_df.head()

Unnamed: 0.1,Unnamed: 0,tweet,class
0,0,!!! RT @mayasolovely: As a woman you shouldn't...,0
1,1,""" momma said no pussy cats inside my doghouse """,0
2,2,"""@Addicted2Guys: -SimplyAddictedToGuys http://...",0
3,3,"""@AllAboutManFeet: http://t.co/3gzUpfuMev"" woo...",0
4,4,"""@Allyhaaaaa: Lemmie eat a Oreo &amp; do these...",0


In [None]:
nltk.download("stopwords")
stopwords = stopwords.words("english")
print (stopwords[:5])

['i', 'me', 'my', 'myself', 'we']


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
def clean_tweet(text):

    #lowercase the tweets and remove trailing & ending space
    text = text.lower().strip()                

    # Removes words followed by @
    text = re.sub("(@[A-Za-z0-9]+)", "", text)

    # Removes words at start of string 
    text = re.sub("([^0-9A-Za-z \t])", "", text)

    # remove non alphanumeric chars 
    text = re.sub("[^A-Za-z0-9]+", " ", text)

    #remove stopwords
    words = [word for word in text.split() if word not in stopwords]
    text = " ".join(words)

    # remove multiple spaces
    text = re.sub(" +", " ", text)

    return text

In [None]:
hspeech_df["tweet"] = hspeech_df["tweet"].apply(clean_tweet)

In [None]:
hspeech_df.head()

Unnamed: 0.1,Unnamed: 0,tweet,class
0,0,rt woman shouldnt complain cleaning house amp ...,0
1,1,momma said pussy cats inside doghouse,0
2,2,simplyaddictedtoguys httptco1jl4hi8zmf woof wo...,0
3,3,httptco3gzupfumev woof woof hot soles,0
4,4,lemmie eat oreo amp dishes one oreo lol,0


## Split Data

In [None]:
TRAIN_SIZE = 0.7
VAL_SIZE = 0.15
TEST_SIZE = 0.15

In [None]:
def train_val_test_split(X, y, train_size):
    """Split dataset into data splits."""
    X_train, X_, y_train, y_ = train_test_split(X, y, train_size=TRAIN_SIZE, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X_, y_, train_size=0.5, stratify=y_)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [None]:
# Data
X = hspeech_df["tweet"].values
y = hspeech_df["class"].values

In [None]:
# Create data splits
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(X=X, y=y, train_size=TRAIN_SIZE)

print (f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print (f"X_val: {X_val.shape}, y_val: {y_val.shape}")
print (f"X_test: {X_test.shape}, y_test: {y_test.shape}")
print (f"Sample point: {X_train[0]} --> {y_train[0]}")

X_train: (3915,), y_train: (3915,)
X_val: (839,), y_val: (839,)
X_test: (839,), y_test: (839,)
Sample point: perhaps ezra miller first crack toward changing regardless whether identifies queer gay --> 0


## Tokenizer

In [None]:
import json
from collections import Counter
from more_itertools import take

In [None]:
class Tokenizer(object):
    def __init__(self, char_level, num_tokens=None, 
                 pad_token='<PAD>', oov_token='<UNK>',
                 token_to_index=None):
        self.char_level = char_level
        self.separator = '' if self.char_level else ' '
        if num_tokens: num_tokens -= 2 # pad + unk tokens
        self.num_tokens = num_tokens
        self.pad_token = pad_token
        self.oov_token = oov_token
        if not token_to_index:
            token_to_index = {pad_token: 0, oov_token: 1}
        self.token_to_index = token_to_index
        self.index_to_token = {v: k for k, v in self.token_to_index.items()}

    def __len__(self):
        return len(self.token_to_index)

    def __str__(self):
        return f"<Tokenizer(num_tokens={len(self)})>"

    def fit_on_texts(self, texts):
        if not self.char_level:
            texts = [text.split(" ") for text in texts]
        all_tokens = [token for text in texts for token in text]
        counts = Counter(all_tokens).most_common(self.num_tokens)
        self.min_token_freq = counts[-1][1]
        for token, count in counts:
            index = len(self)
            self.token_to_index[token] = index
            self.index_to_token[index] = token
        return self

    def texts_to_sequences(self, texts):
        sequences = []
        for text in texts:
            if not self.char_level:
                text = text.split(' ')
            sequence = []
            for token in text:
                sequence.append(self.token_to_index.get(
                    token, self.token_to_index[self.oov_token]))
            sequences.append(np.asarray(sequence))
        return sequences

    def sequences_to_texts(self, sequences):
        texts = []
        for sequence in sequences:
            text = []
            for index in sequence:
                text.append(self.index_to_token.get(index, self.oov_token))
            texts.append(self.separator.join([token for token in text]))
        return texts

    def save(self, fp):
        with open(fp, "w") as fp:
            contents = {
                "char_level": self.char_level,
                "oov_token": self.oov_token,
                "token_to_index": self.token_to_index
            }
            json.dump(contents, fp, indent=4, sort_keys=False)

    @classmethod
    def load(cls, fp):
        with open(fp, "r") as fp:
            kwargs = json.load(fp=fp)
        return cls(**kwargs)

In [None]:
# Tokenize
tokenizer = Tokenizer(char_level=False, num_tokens=5000)
tokenizer.fit_on_texts(texts=X_train)
VOCAB_SIZE = len(tokenizer)
print(tokenizer)

<Tokenizer(num_tokens=5000)>


In [None]:
# Convert texts to sequences of indices
X_train = tokenizer.texts_to_sequences(X_train)
X_val = tokenizer.texts_to_sequences(X_val)
X_test = tokenizer.texts_to_sequences(X_test)
preprocessed_text = tokenizer.sequences_to_texts([X_train[0]])[0]

print ("Text to indices:\n"
    f"  (preprocessed) → {preprocessed_text}\n"
    f"  (tokenized) → {X_train[0]}")

Text to indices:
  (preprocessed) → perhaps ezra miller first crack toward changing regardless whether identifies queer gay
  (tokenized) → [2201 3627 3628  102 1237 3629 1238 3630 3631 2202  133  164]


## Padding

We'll need to do 2D padding to our tokenized text.

In [None]:
def pad_sequences(sequences, max_seq_len=0):
    """Pad sequences to max length in sequence."""
    
    max_seq_len = max(max_seq_len, max(len(sequence) for sequence in sequences))
    padded_sequences = np.zeros((len(sequences), max_seq_len))
    for i, sequence in enumerate(sequences):
        padded_sequences[i][:len(sequence)] = sequence
    return padded_sequences

In [None]:
# 2D sequences
padded = pad_sequences(X_train[0:3])
print(padded.shape)
print(padded)

(3, 13)
[[2.201e+03 3.627e+03 3.628e+03 1.020e+02 1.237e+03 3.629e+03 1.238e+03
  3.630e+03 3.631e+03 2.202e+03 1.330e+02 1.640e+02 0.000e+00]
 [3.632e+03 1.900e+02 1.239e+03 3.633e+03 8.580e+02 1.140e+02 1.200e+01
  1.584e+03 3.634e+03 3.635e+03 2.203e+03 8.590e+02 3.636e+03]
 [5.780e+02 4.100e+02 3.637e+03 2.650e+02 1.430e+02 2.204e+03 2.205e+03
  3.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00]]


## Datasets and Dataloaders

Create Datasets and DataLoaders to be able to efficiently create batches with the data splits

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y,):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __str__(self):
        return f"<Dataset(N={len(self)})>"

    def __getitem__(self, index):
        X = self.X[index]
        y = self.y[index]
        return [X, len(X), y]

    def collate_fn(self, batch):
        """Processing on a batch."""
        # Get inputs
        batch = np.array(batch, dtype=object)
        X = batch[:, 0]
        seq_lens = batch[:, 1]
        y = np.stack(batch[:, 2], axis=0)

        # Pad inputs
        X = pad_sequences(sequences=X)

        # Cast
        X = torch.LongTensor(X.astype(np.int32))
        seq_lens = torch.LongTensor(seq_lens.astype(np.int32))
        y = torch.LongTensor(y.astype(np.int32))

        return X, seq_lens, y

    def create_dataloader(self, batch_size, shuffle=False, drop_last=False):
        return torch.utils.data.DataLoader(
            dataset=self, batch_size=batch_size, collate_fn=self.collate_fn,
            shuffle=shuffle, drop_last=drop_last, pin_memory=True)

In [None]:
# Create datasets
train_dataset = Dataset(X=X_train, y=y_train)
val_dataset = Dataset(X=X_val, y=y_val)
test_dataset = Dataset(X=X_test, y=y_test)

In [None]:
# Create dataloaders
batch_size = 64
train_dataloader = train_dataset.create_dataloader(batch_size=batch_size)
val_dataloader = val_dataset.create_dataloader(batch_size=batch_size)
test_dataloader = test_dataset.create_dataloader(batch_size=batch_size)

batch_X, batch_seq_lens, batch_y = next(iter(train_dataloader))

## Trainer

In [None]:
class Trainer(object):
    def __init__(self, model, device, loss_fn=None, optimizer=None, scheduler=None):

        # Set params
        self.model = model
        self.device = device
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.scheduler = scheduler

    def train_step(self, dataloader):
        """Train step."""
        # Set model to train mode
        self.model.train()
        loss = 0.0

        # Iterate over train batches
        for i, batch in enumerate(dataloader):

            # Step
            batch = [item.to(self.device) for item in batch]  # Set device
            inputs, targets = batch[:-1], batch[-1]
            self.optimizer.zero_grad()  # Reset gradients
            z = self.model(inputs)  # Forward pass
            J = self.loss_fn(z, targets)  # Define loss
            J.backward()  # Backward pass
            self.optimizer.step()  # Update weights

            # Cumulative Metrics
            loss += (J.detach().item() - loss) / (i + 1)

        return loss

    def eval_step(self, dataloader):
        """Validation or test step."""
        # Set model to eval mode
        self.model.eval()
        loss = 0.0
        y_trues, y_probs = [], []

        # Iterate over val batches
        with torch.inference_mode():
            for i, batch in enumerate(dataloader):

                # Step
                batch = [item.to(self.device) for item in batch]  # Set device
                inputs, y_true = batch[:-1], batch[-1]
                z = self.model(inputs)  # Forward pass
                J = self.loss_fn(z, y_true).item()

                # Cumulative Metrics
                loss += (J - loss) / (i + 1)

                # Store outputs
                y_prob = F.softmax(z,dim=1).cpu().numpy()
                y_probs.extend(y_prob)
                y_trues.extend(y_true.cpu().numpy())

        return loss, np.vstack(y_trues), np.vstack(y_probs)

    def predict_step(self, dataloader):
        """Prediction step."""
        # Set model to eval mode
        self.model.eval()
        y_probs = []

        # Iterate over val batches
        with torch.inference_mode():
            for i, batch in enumerate(dataloader):

                # Forward pass w/ inputs
                inputs, targets = batch[:-1], batch[-1]
                z = self.model(inputs)

                # Store outputs
                y_prob = F.softmax(z,dim=1).cpu().numpy()
                y_probs.extend(y_prob)

        return np.vstack(y_probs)
    
    def train(self, num_epochs, patience, train_dataloader, val_dataloader):
        best_val_loss = np.inf
        for epoch in range(num_epochs):
            # Steps
            train_loss = self.train_step(dataloader=train_dataloader)
            val_loss, _, _ = self.eval_step(dataloader=val_dataloader)
            self.scheduler.step(val_loss)

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = self.model
                _patience = patience  # reset _patience
            else:
                _patience -= 1
            if not _patience:  # 0
                print("Stopping early!")
                break

            # Logging
            print(
                f"Epoch: {epoch+1} | "
                f"train_loss: {train_loss:.5f}, "
                f"val_loss: {val_loss:.5f}, "
                f"lr: {self.optimizer.param_groups[0]['lr']:.2E}, "
                f"_patience: {_patience}"
            )
        return best_model

# Gated RNNs: LSTMs & GRUs

In [None]:
BATCH_SIZE = 64
EMBEDDING_DIM = 100
RNN_HIDDEN_DIM = 128
HIDDEN_DIM = 100
DROPOUT_P = 0.1

In [None]:
NUM_LAYERS = 1
LEARNING_RATE = 1e-4
PATIENCE = 10
NUM_EPOCHS = 50

In [None]:
def gather_last_relevant_hidden(hiddens, seq_lens):
    """Extract and collect the last relevant
    hidden state based on the sequence length."""
    seq_lens = seq_lens.long().detach().cpu().numpy() - 1
    out = []
    for batch_index, column_index in enumerate(seq_lens):
        out.append(hiddens[batch_index, column_index])
    return torch.stack(out)

## Bidirectional GRU

### Model

In [None]:
class GRU(nn.Module):
    def __init__(self, embedding_dim, vocab_size, rnn_hidden_dim,
                 hidden_dim, dropout_p, num_classes, padding_idx=0):
        super(GRU, self).__init__()
        
        # Initialize embeddings
        self.embeddings = nn.Embedding(embedding_dim=embedding_dim,
                                       num_embeddings=vocab_size,
                                       padding_idx=padding_idx)
        
        # RNN
        self.rnn = nn.GRU(embedding_dim, rnn_hidden_dim, 
                          batch_first=True, bidirectional=True)
     
        # FC weights
        self.dropout = nn.Dropout(dropout_p)
        self.fc1 = nn.Linear(rnn_hidden_dim*2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, inputs):
        # Embed
        x_in, seq_lens = inputs
        x_in = self.embeddings(x_in)
            
        # Rnn outputs
        out, h_n = self.rnn(x_in)
        z = gather_last_relevant_hidden(hiddens=out, seq_lens=seq_lens)

        # FC layers
        z = self.fc1(z)
        z = self.dropout(z)
        z = self.fc2(z)
        return z

In [None]:
# Simple gru cell
NUM_CLASSES = 2

model = GRU(
    embedding_dim=EMBEDDING_DIM, vocab_size=VOCAB_SIZE, 
    rnn_hidden_dim=RNN_HIDDEN_DIM, hidden_dim=HIDDEN_DIM, 
    dropout_p=DROPOUT_P, num_classes=NUM_CLASSES)
model = model.to(device) # set device
print (model.named_parameters)

<bound method Module.named_parameters of GRU(
  (embeddings): Embedding(5000, 100, padding_idx=0)
  (rnn): GRU(100, 128, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (fc1): Linear(in_features=256, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=2, bias=True)
)>


### Training

In [None]:
# Class weights
counts = np.bincount(y_train)
class_weights = {i: 1.0/count for i, count in enumerate(counts)}
print (f"counts: {counts}\nweights: {class_weights}")

counts: [2914 1001]
weights: {0: 0.00034317089910775565, 1: 0.000999000999000999}


In [None]:
# Define Loss
class_weights_tensor = torch.Tensor(list(class_weights.values())).to(device)
loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor)

In [None]:
# Define optimizer & scheduler
optimizer = Adam(model.parameters(), lr=LEARNING_RATE) 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.1, patience=3)

In [None]:
# Trainer module
trainer = Trainer(
    model=model, device=device, loss_fn=loss_fn, 
    optimizer=optimizer, scheduler=scheduler)

In [None]:
# Train
best_model = trainer.train(
    NUM_EPOCHS, PATIENCE, train_dataloader, val_dataloader)

Epoch: 1 | train_loss: 0.67524, val_loss: 0.66007, lr: 1.00E-04, _patience: 10
Epoch: 2 | train_loss: 0.64170, val_loss: 0.63138, lr: 1.00E-04, _patience: 10
Epoch: 3 | train_loss: 0.60291, val_loss: 0.59580, lr: 1.00E-04, _patience: 10
Epoch: 4 | train_loss: 0.54848, val_loss: 0.53940, lr: 1.00E-04, _patience: 10
Epoch: 5 | train_loss: 0.48081, val_loss: 0.47151, lr: 1.00E-04, _patience: 10
Epoch: 6 | train_loss: 0.41836, val_loss: 0.43924, lr: 1.00E-04, _patience: 10
Epoch: 7 | train_loss: 0.37255, val_loss: 0.41655, lr: 1.00E-04, _patience: 10
Epoch: 8 | train_loss: 0.33459, val_loss: 0.39768, lr: 1.00E-04, _patience: 10
Epoch: 9 | train_loss: 0.30029, val_loss: 0.38080, lr: 1.00E-04, _patience: 10
Epoch: 10 | train_loss: 0.26783, val_loss: 0.36692, lr: 1.00E-04, _patience: 10
Epoch: 11 | train_loss: 0.23767, val_loss: 0.35575, lr: 1.00E-04, _patience: 10
Epoch: 12 | train_loss: 0.20875, val_loss: 0.34802, lr: 1.00E-04, _patience: 10
Epoch: 13 | train_loss: 0.18269, val_loss: 0.3438

### Evaluation on test set

In [None]:
from sklearn.metrics import classification_report

In [None]:
# Get predictions on test data
test_loss, y_true, y_prob = trainer.eval_step(dataloader=test_dataloader)
y_pred = np.argmax(y_prob, axis=1)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.91      0.94      0.93       625
           1       0.81      0.73      0.77       214

    accuracy                           0.89       839
   macro avg       0.86      0.84      0.85       839
weighted avg       0.89      0.89      0.89       839




**Precision : 0.86**

**Recall : 0.84**

## Bidirectional LSTM


### Model

In [None]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, vocab_size, rnn_hidden_dim,
                 hidden_dim, dropout_p, num_classes, padding_idx=0):
        super(LSTM, self).__init__()
        
        # Initialize embeddings
        self.embeddings = nn.Embedding(embedding_dim=embedding_dim,
                                       num_embeddings=vocab_size,
                                       padding_idx=padding_idx)
        
        # RNN
        self.rnn = nn.LSTM(embedding_dim, rnn_hidden_dim, 
                          batch_first=True, bidirectional=True)
     
        # FC weights
        self.dropout = nn.Dropout(dropout_p)
        self.fc1 = nn.Linear(rnn_hidden_dim*2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, inputs):
        # Embed
        x_in, seq_lens = inputs
        x_in = self.embeddings(x_in)
            
        # Rnn outputs
        out, h_n = self.rnn(x_in)
        z = gather_last_relevant_hidden(hiddens=out, seq_lens=seq_lens)

        # FC layers
        z = self.fc1(z)
        z = self.dropout(z)
        z = self.fc2(z)
        return z

In [None]:
# Simple LSTM cell
NUM_CLASSES = 2

model = LSTM(
    embedding_dim=EMBEDDING_DIM, vocab_size=VOCAB_SIZE, 
    rnn_hidden_dim=RNN_HIDDEN_DIM, hidden_dim=HIDDEN_DIM, 
    dropout_p=DROPOUT_P, num_classes=NUM_CLASSES)
model = model.to(device) # set device
print (model.named_parameters)

<bound method Module.named_parameters of LSTM(
  (embeddings): Embedding(5000, 100, padding_idx=0)
  (rnn): LSTM(100, 128, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (fc1): Linear(in_features=256, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=2, bias=True)
)>


### Training

In [None]:
# Define Loss
class_weights_tensor = torch.Tensor(list(class_weights.values())).to(device)
loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor)

In [None]:
# Define optimizer & scheduler
optimizer = Adam(model.parameters(), lr=LEARNING_RATE) 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.1, patience=3)

In [None]:
# Trainer module
trainer = Trainer(
    model=model, device=device, loss_fn=loss_fn, 
    optimizer=optimizer, scheduler=scheduler)

In [None]:
# Train
best_model = trainer.train(
    NUM_EPOCHS, PATIENCE, train_dataloader, val_dataloader)

Epoch: 1 | train_loss: 0.67853, val_loss: 0.66349, lr: 1.00E-04, _patience: 10
Epoch: 2 | train_loss: 0.65063, val_loss: 0.63482, lr: 1.00E-04, _patience: 10
Epoch: 3 | train_loss: 0.60741, val_loss: 0.58675, lr: 1.00E-04, _patience: 10
Epoch: 4 | train_loss: 0.53465, val_loss: 0.49130, lr: 1.00E-04, _patience: 10
Epoch: 5 | train_loss: 0.45489, val_loss: 0.44527, lr: 1.00E-04, _patience: 10
Epoch: 6 | train_loss: 0.40032, val_loss: 0.41737, lr: 1.00E-04, _patience: 10
Epoch: 7 | train_loss: 0.35960, val_loss: 0.39743, lr: 1.00E-04, _patience: 10
Epoch: 8 | train_loss: 0.32363, val_loss: 0.38365, lr: 1.00E-04, _patience: 10
Epoch: 9 | train_loss: 0.29154, val_loss: 0.37433, lr: 1.00E-04, _patience: 10
Epoch: 10 | train_loss: 0.26039, val_loss: 0.37054, lr: 1.00E-04, _patience: 10
Epoch: 11 | train_loss: 0.23189, val_loss: 0.36754, lr: 1.00E-04, _patience: 10
Epoch: 12 | train_loss: 0.20483, val_loss: 0.36616, lr: 1.00E-04, _patience: 10
Epoch: 13 | train_loss: 0.17866, val_loss: 0.3685

### Evaluation on test set

In [None]:
# Get predictions on test data
test_loss, y_true, y_prob = trainer.eval_step(dataloader=test_dataloader)
y_pred = np.argmax(y_prob, axis=1)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.92      0.92      0.92       625
           1       0.78      0.77      0.77       214

    accuracy                           0.89       839
   macro avg       0.85      0.85      0.85       839
weighted avg       0.89      0.89      0.89       839



**Precision : 0.85**

**Recall : 0.85**