In [1]:
from __future__ import absolute_import, division, unicode_literals
import sys
import io
import numpy as np
import logging
import argparse
import torch
import random
from transformers import *
import utils
import json
import pandas as pd
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# -----------------------------------------------
def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)


In [3]:
class Args:
  def __init__(args, batch_size, max_seq_length, seed, model_type, embed_method, context_window_size, layer_start, tasks, device):
    args.batch_size = batch_size
    args.max_seq_length = max_seq_length
    args.seed = seed
    args.model_type = model_type
    args.embed_method = embed_method
    args.context_window_size = context_window_size
    args.layer_start = layer_start
    args.tasks = tasks
    args.device = device

In [4]:
args = Args(64,128,42,"binwang/bert-base-nli-stsb","ave_last_hidden",2,4,'sts', 2)

In [5]:
# -----------------------------------------------
# Set device
torch.cuda.set_device(2)
device = torch.device("cuda", 0)
args.device = device

# -----------------------------------------------
# Set seed
set_seed(args)
# Set up logger
logging.basicConfig(format="%(asctime)s : %(message)s", level=logging.DEBUG)
# Set Model
params = vars(args)

config = AutoConfig.from_pretrained(params["model_type"], cache_dir="./cache")
config.output_hidden_states = True
tokenizer = AutoTokenizer.from_pretrained(params["model_type"], cache_dir="./cache")
model = AutoModelWithLMHead.from_pretrained(
    params["model_type"], config=config, cache_dir="./cache"
)
model.to(params["device"])

2022-05-03 11:28:52,330 : Starting new HTTPS connection (1): s3.amazonaws.com:443
2022-05-03 11:28:53,332 : https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/binwang/bert-base-nli-stsb/config.json HTTP/1.1" 200 0
2022-05-03 11:28:53,335 : loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/binwang/bert-base-nli-stsb/config.json from cache at ./cache/da86f44c4219e194281dc8357b2f3b710998009231581665837f1dc21b08fb75.2f62e73f3306183a4b5f94267e4695f8afd4c3f2b29a4c4df18f8fecd046e609
2022-05-03 11:28:53,336 : Model config {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "out

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [6]:
def sbert(sentences):
    # -----------------------------------------------
    sentences_index = [tokenizer.encode(s, add_special_tokens=True) for s in sentences]
    features_input_ids = []
    features_mask = []
    for sent_ids in sentences_index:
        # Truncate if too long
        if len(sent_ids) > params["max_seq_length"]:
            sent_ids = sent_ids[: params["max_seq_length"]]
        sent_mask = [1] * len(sent_ids)
        # Padding
        padding_length = params["max_seq_length"] - len(sent_ids)
        sent_ids += [0] * padding_length
        sent_mask += [0] * padding_length
        # Length Check
        assert len(sent_ids) == params["max_seq_length"]
        assert len(sent_mask) == params["max_seq_length"]

        features_input_ids.append(sent_ids)
        features_mask.append(sent_mask)

    features_mask = np.array(features_mask)

    batch_input_ids = torch.tensor(features_input_ids, dtype=torch.long)
    batch_input_mask = torch.tensor(features_mask, dtype=torch.long)
    batch = [batch_input_ids.to(device), batch_input_mask.to(device)]

    inputs = {"input_ids": batch[0], "attention_mask": batch[1]}
    model.zero_grad()

    with torch.no_grad():
        features = model(**inputs)[1]

    # Reshape features from list of (batch_size, seq_len, hidden_dim) for each hidden state to list
    # of (num_hidden_states, seq_len, hidden_dim) for each element in the batch.
    all_layer_embedding = torch.stack(features).permute(1, 0, 2, 3).cpu().numpy()

    embed_method = utils.generate_embedding(params["embed_method"], features_mask)
    embedding = embed_method.embed(params, all_layer_embedding)

    similarity = (
        embedding[0].dot(embedding[1])
        / np.linalg.norm(embedding[0])
        / np.linalg.norm(embedding[1])
    )
    return similarity

In [7]:
df = pd.read_json('/root/thesis/ViLT/cosmos/test_data.json',orient="records", lines=True)

In [11]:
tqdm.pandas()
df['sbert_wk_score'] = df.progress_apply(lambda x: sbert([x.caption1,x.caption2]), axis=1)


100%|██████████| 1700/1700 [00:34<00:00, 49.43it/s]


In [12]:
df['sbert_wk_score']

0       0.576994
1       0.541939
2       0.234810
3       0.460771
4       0.619695
          ...   
1695    0.477130
1696    0.489925
1697    0.185942
1698    0.547756
1699    0.529874
Name: sbert_wk_score, Length: 1700, dtype: float32

In [10]:
i=0.5
print('threshold: ',i)
df['predict'] = df['sbert_wk_score'] < i
confusion_matrix = pd.crosstab(df['predict'], df['context_label'], rownames=['Predicted'], colnames=['Actual'])
print(confusion_matrix)
result = (confusion_matrix[0][0]+confusion_matrix[1][1])/1700
print('accuracy:', result)


threshold:  0.5
Actual       0    1
Predicted          
False      449  144
True       401  706
accuracy: 0.6794117647058824
threshold:  0.51
Actual       0    1
Predicted          
False      435  135
True       415  715
accuracy: 0.6764705882352942
threshold:  0.52
Actual       0    1
Predicted          
False      426  127
True       424  723
accuracy: 0.6758823529411765
threshold:  0.53
Actual       0    1
Predicted          
False      407  120
True       443  730
accuracy: 0.6688235294117647
threshold:  0.54
Actual       0    1
Predicted          
False      393  109
True       457  741
accuracy: 0.6670588235294118
threshold:  0.55
Actual       0    1
Predicted          
False      383   97
True       467  753
accuracy: 0.668235294117647
threshold:  0.56
Actual       0    1
Predicted          
False      369   88
True       481  762
accuracy: 0.6652941176470588
threshold:  0.5700000000000001
Actual       0    1
Predicted          
False      353   82
True       497  768
accuracy: