In [1]:
import tensorflow as tf

# Get the GPU device name.
device_name = tf.test.gpu_device_name()

# The device name should look like the following:
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')
    

import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")


Found GPU at: /device:GPU:0
There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [2]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/22/97/7db72a0beef1825f82188a4b923e62a146271ac2ced7928baa4d47ef2467/transformers-2.9.1-py3-none-any.whl (641kB)
[K     |▌                               | 10kB 24.3MB/s eta 0:00:01[K     |█                               | 20kB 29.0MB/s eta 0:00:01[K     |█▌                              | 30kB 19.2MB/s eta 0:00:01[K     |██                              | 40kB 16.2MB/s eta 0:00:01[K     |██▌                             | 51kB 13.3MB/s eta 0:00:01[K     |███                             | 61kB 12.4MB/s eta 0:00:01[K     |███▋                            | 71kB 11.7MB/s eta 0:00:01[K     |████                            | 81kB 10.5MB/s eta 0:00:01[K     |████▋                           | 92kB 10.7MB/s eta 0:00:01[K     |█████                           | 102kB 10.4MB/s eta 0:00:01[K     |█████▋                          | 112kB 10.4MB/s eta 0:00:01[K     |██████▏                         | 

In [3]:
import os
import json
import tensorflow as tf
from tensorflow import keras
import numpy as np
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
from tqdm import tqdm

from transformers import BertTokenizer

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)


sen1_test = []
sen2_test = []
res_test = []
sen1_train = []
sen2_train = []
res_train = []
resp = urlopen("https://nlp.stanford.edu/projects/snli/snli_1.0.zip")
zipfile = ZipFile(BytesIO(resp.read()))
for line in zipfile.open('snli_1.0/snli_1.0_train.jsonl').readlines():
    x = json.loads(line)
    k = 0
    y = -1
    if x['gold_label'] == "contradiction":
        y = 0
        k = 1
    elif x['gold_label'] == "neutral":
        y = 1
        k = 1
    elif x['gold_label'] == "entailment":
        y = 2
        k = 1
    if k==1:
        sen1_train.append(x['sentence1'].lower())
        sen2_train.append(x['sentence2'].lower())
        res_train.append(y)
for line in zipfile.open('snli_1.0/snli_1.0_test.jsonl').readlines():
    x = json.loads(line)
    k = 0
    y = -1
    if x['gold_label'] == "contradiction":
        y = 0
        k = 1
    elif x['gold_label'] == "neutral":
        y = 1
        k = 1
    elif x['gold_label'] == "entailment":
        y = 2
        k = 1
    if k==1:
        sen1_test.append(x['sentence1'].lower())
        sen2_test.append(x['sentence2'].lower())
        res_test.append(y)

Loading BERT tokenizer...


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




In [0]:
import requests

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

In [0]:
file_id = '1czYDQiKQJmMsIKmw4HBcuKxN6vkSDVV2'
#file_id = '14r9IfokNmd-0E8Tgkqt8BbTZ9MJblypj'
destination = './BERT_test.pt'
download_file_from_google_drive(file_id, destination)

In [61]:
model = torch.load('./BERT_test1.pt')
model.eval()
#model.cuda()



BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [0]:
len(res_test)
r = []
s1 = []
s2 = []
per = np.random.permutation(9824)
for i in per:
    r.append(res_test[i])
    s1.append(sen1_test[i])
    s2.append(sen2_test[i])

In [62]:
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler
input_ids = []
attention_masks = []

# For every sentence...
for i in range(len(res_test)):
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
                        sen1_test[i],                      # Sentence to encode.
                        sen2_test[i],
                        max_length = 512,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )
    
    # Add the encoded sentence to the list.    
    input_ids.append(encoded_dict['input_ids'])
    
    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
res_test = torch.tensor(res_test)

# Set the batch size.  
batch_size = 128  

# Create the DataLoader.
prediction_data = TensorDataset(input_ids, attention_masks, res_test)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)


# Prediction on test set

print('Predicting labels for {:,} test sentences...'.format(len(input_ids)))

# Put model in evaluation mode
model.eval()

# Tracking variables 
predictions , true_labels = [], []

# Predict 
for batch in prediction_dataloader:
  # Add batch to GPU
  batch = tuple(t.to(device) for t in batch)
  
  # Unpack the inputs from our dataloader
  b_input_ids, b_input_mask, b_labels = batch
  
  # Telling the model not to compute or store gradients, saving memory and 
  # speeding up prediction
  with torch.no_grad():
      # Forward pass, calculate logit predictions
      outputs = model(b_input_ids, token_type_ids=None, 
                      attention_mask=b_input_mask)

  logits = outputs[0]

  # Move logits and labels to CPU
  logits = logits.detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()
  
  # Store predictions and true labels
  predictions.append(logits)
  true_labels.append(label_ids)

print('    DONE.')



Predicting labels for 9,824 test sentences...
    DONE.


In [63]:





mypredict = []
for item in predictions:
    for it in item:
        i = np.argmax(it)
        mypredict.append(i)

sz = len(mypredict)
acc = 0
for i in range(sz):
    if mypredict[i] == res_test[i]:
        acc+=1
print(acc*100/sz)
with open('./dl.txt', "w") as file:
    for item in mypredict:
        if item == 0:
            file.write("contradiction\n")
        elif item == 1:
            file.write("neutral\n")
        elif item == 2:
            file.write("entailment\n")
        else:
            pass

90.48249185667753


In [67]:
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler
input_ids = []
attention_masks = []

# For every sentence...
for i in range(len(r)):
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
                        s1[i],                      # Sentence to encode.
                        s2[i],
                        max_length = 512,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )
    
    # Add the encoded sentence to the list.    
    input_ids.append(encoded_dict['input_ids'])
    
    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
res_test = torch.tensor(res_test)

# Set the batch size.  
batch_size = 128  

# Create the DataLoader.
prediction_data = TensorDataset(input_ids, attention_masks, res_test)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)


# Prediction on test set

print('Predicting labels for {:,} test sentences...'.format(len(input_ids)))

# Put model in evaluation mode
model.eval()

# Tracking variables 
predictions , true_labels = [], []

# Predict 
for batch in prediction_dataloader:
  # Add batch to GPU
  batch = tuple(t.to(device) for t in batch)
  
  # Unpack the inputs from our dataloader
  b_input_ids, b_input_mask, b_labels = batch
  
  # Telling the model not to compute or store gradients, saving memory and 
  # speeding up prediction
  with torch.no_grad():
      # Forward pass, calculate logit predictions
      outputs = model(b_input_ids, token_type_ids=None, 
                      attention_mask=b_input_mask)

  logits = outputs[0]

  # Move logits and labels to CPU
  logits = logits.detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()
  
  # Store predictions and true labels
  predictions.append(logits)
  true_labels.append(label_ids)

print('    DONE.')



Predicting labels for 9,824 test sentences...
    DONE.


In [69]:





mypredict = []
for item in predictions:
    for it in item:
        i = np.argmax(it)
        mypredict.append(i)

sz = len(mypredict)
acc = 0
for i in range(sz):
    if mypredict[i] == r[i]:
        acc+=1
print(acc*100/sz)
with open('./dl.txt', "w") as file:
    for item in mypredict:
        if item == 0:
            file.write("contradiction\n")
        elif item == 1:
            file.write("neutral\n")
        elif item == 2:
            file.write("entailment\n")
        else:
            pass

90.48249185667753
