<a href="https://colab.research.google.com/github/SaiSakethAluru/DE-LIMIT/blob/master/Example/N_Class_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import tensorflow as tf
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score,f1_score

Using TensorFlow backend.


In [5]:
tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/dehatebert-mono-english")

model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/dehatebert-mono-english")


In [3]:
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")



There are 1 GPU(s) available.
We will use the GPU: Quadro RTX 8000


In [None]:
#NUM_CLASSES = 2
#model.classifier = torch.nn.Linear(in_features=model.classifier.in_features, out_features=NUM_CLASSES)

In [None]:
# Since the new classifier layer weights are randomly initialized, need to train the model again for accurate predictions.
# In case you wish to train the whole model with the dataset, proceed with the above constructed 'model' variable directly for training. 
# Else if you wish to freeze the rest of the model (bert layers) except the final linear layer, you can do so like this. 

In [None]:
for name,layer in model.named_parameters():
  if 'classifier' not in name:
    layer.requires_grad = False

In [6]:
d = {'label': [1,0], 'sentence':['I fucking hate white people', 'I love you so much']}
df = pd.DataFrame(data=d)

In [7]:
df

Unnamed: 0,label,sentence
0,1,I fucking hate white people
1,0,I love you so much


In [9]:
labels = df.label.values

In [10]:
sentences = df.sentence.values
sentences = ["[CLS] " + sentence + " [SEP]" for sentence in sentences]
tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]

In [12]:
input_ids = pad_sequences(input_ids, maxlen=20, dtype="long", truncating="post", padding="post")

In [13]:
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.3)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
    

In [14]:
# Create attention masks
attention_masks = []

# Create a mask of 1s for each token followed by 0s for padding
for seq in input_ids:
  seq_mask = [float(i>0) for i in seq]
  attention_masks.append(seq_mask)

In [16]:
prediction_inputs = torch.tensor(input_ids)
prediction_masks = torch.tensor(attention_masks)
prediction_labels = torch.tensor(labels)

In [17]:
prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)

In [18]:
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=2)

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'Quadro RTX 8000'

In [20]:
import numpy as np
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [21]:
predictions , true_labels, pred_labels, eval_accuracy = [], [], [], []
# Predict 
for batch in prediction_dataloader:
  # Add batch to GPU
  batch = tuple(t.to(device) for t in batch)
  model=model.to(device)
  # Unpack the inputs from our dataloader
  b_input_ids, b_input_mask, b_labels = batch
  # Telling the model not to compute or store gradients, saving memory and speeding up prediction
  with torch.no_grad():
    # Forward pass, calculate logit predictions
    logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0]

  # Move logits and labels to CPU
  logits = logits.detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()
  tmp_eval_accuracy = flat_accuracy(logits, label_ids)
		# Accumulate the total accuracy.
  eval_accuracy += tmp_eval_accuracy
		
  pred_labels+=list(np.argmax(logits, axis=1).flatten())
  true_labels+=list(label_ids.flatten())
  

#print(eval_accuracy)

In [22]:
logits

array([[-0.5380583,  0.6882307],
       [ 1.7614641, -1.9261045]], dtype=float32)

In [23]:
pred_labels

[1, 0]

In [24]:
true_labels

[1, 0]

In [25]:
from sklearn.metrics import accuracy_score,f1_score
testf1=f1_score(true_labels, pred_labels, average='macro')
testacc=accuracy_score(true_labels,pred_labels)

In [26]:
testf1

1.0

In [27]:
testacc

1.0