In [1]:
import json
import sys
import random
from tqdm import tqdm
from collections import defaultdict
import pyarrow as pa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import re
class SentenceNegator:
  IRREGULAR_ES_VERB_ENDINGS = ["ss", "x", "ch", "sh", "o"]

  def negate(self, sentence):
    # is
    if sentence.find("isn't") > -1:
      return sentence.replace("isn't", "is")

    if sentence.find("isn\\'t") > -1:
      return sentence.replace("isn\\'t", "is")

    if sentence.find("is not ") > -1:
      return sentence.replace("is not ", "is ")

    if sentence.find("is ") > -1:
      return sentence.replace("is ", "is not ")

    # has
    if sentence.find("does not have") > -1:
      return sentence.replace("does not have", "has")

    if sentence.find("doesn't have") > -1:
      return sentence.replace("doesn't have", "has")

    if sentence.find("doesn\\'t have") > -1:
      return sentence.replace("doesn\\'t have", "has")

    if sentence.find("has ") > -1:
      return sentence.replace("has ", "does not have ")

    # should
    if sentence.find("shouldn't") > -1:
      return sentence.replace("shouldn't", "should")

    if sentence.find("shouldn\\'t") > -1:
      return sentence.replace("shouldn\\'t", "should")

    if sentence.find("should not") > -1:
      return sentence.replace("should not", "should")

    if sentence.find("should") > -1:
      return sentence.replace("should", "should not")

    # must
    if sentence.find("mustn't") > -1:
      return sentence.replace("mustn't", "must")

    if sentence.find("mustn\\'t") > -1:
      return sentence.replace("mustn\\'t", "must")

    if sentence.find("must not") > -1:
      return sentence.replace("must not", "must")

    if sentence.find("must ") > -1:
      return sentence.replace("must ", "must not ")

    # can
    if sentence.find("can't") > -1:
      return sentence.replace("can't", "can")

    if sentence.find("can\\'t") > -1:
      return sentence.replace("can\\'t", "can")

    if sentence.find("cannot") > -1:
      return sentence.replace("cannot", "can")

    if sentence.find("can ") > -1:
      return sentence.replace("can ", "cannot ")
    # was
    if sentence.find(" was ") > -1:
      return sentence.replace(" was ", " was not ")
    if sentence.find("was not ") > -1:
      return sentence.replace("was not ", "was ")
    if sentence.find("wasn't") > -1:
      return sentence.replace("wasn't", "was")
    # doesn't work -> works
    doesnt_regex = r'(doesn\'t|doesn\\\'t|does not) (?P<verb>\w+)'

    if re.search(doesnt_regex, sentence):
      def replace_doesnt(matchobj):
        verb = matchobj.group(2)

        if verb.endswith("y") and self.__is_consonant(verb[-2]):
          return "{0}ies".format(verb[0:-1])

        for ending in self.IRREGULAR_ES_VERB_ENDINGS:
          if verb.endswith(ending):
            return "{0}es".format(verb)

        return "{0}s".format(verb)

      return re.sub(doesnt_regex, replace_doesnt, sentence, 1)

    verb_regex = r'(It |it |)(?P<verb>\w+)s( |$)'

    # works -> does not work
    def replace_verb(matchobj):
      subject = matchobj.group(1)
      verb = matchobj.group(2)
      whitespace = matchobj.group(3)

      # flies -> fly, but not die -> dy
      if verb.endswith("ie") and len(verb) > 3:
        verb = "{0}y".format(verb[0:-2])

      # stresses -> stress
      for ending in self.IRREGULAR_ES_VERB_ENDINGS:
        if verb.endswith("{0}e".format(ending)):
          verb = verb[0:-1]

      return "{0}does not {1}{2}".format(subject, verb, whitespace)

    if re.search(verb_regex, sentence):
      return re.sub(verb_regex, replace_verb, sentence, 1)

    return sentence

  def __is_consonant(self, letter):
    return letter not in ['a', 'e', 'i', 'o', 'u', 'y']


In [3]:
%%time
root = '.'

train_data = list(
    map(json.loads, open(f"{root}/cosmos/train_data.json").readlines())
)
test_data = list(
    map(json.loads, open(f"{root}/cosmos/test_data.json").readlines())
)
val_data = list(map(json.loads, open(f"{root}/cosmos/val_data.json").readlines()))

CPU times: user 11.6 s, sys: 596 ms, total: 12.2 s
Wall time: 12.2 s


In [4]:
np.random.seed(42)
train_data_sample = np.random.choice(train_data, size=int(len(train_data)*50/100))
# Not OOC cases
sn = SentenceNegator()
l = []
nice=0
ne=0
bad=0
for data in tqdm(train_data_sample):
    if len(data['articles']) > 1:
        # Pick 2 random caption
        caption_1_idx, caption_2_idx = random.sample(range(0,len(data['articles'])),2)
        cap1 = data['articles'][caption_1_idx]['caption_modified']
        cap2 = data['articles'][caption_2_idx]['caption_modified']
        if data['img_local_path'] == 'train/109750.jpg':
            print(cap1)
            print(cap2)
        if cap1 != cap2:
            l.append([data['img_local_path'],[cap1],[cap2],[False]])
            nice += 1
        else:
            cap2 = sn.negate(cap2)
            if cap1 != cap2:
                l.append([data['img_local_path'],[cap1],[cap2],[True]])
                ne += 1

    if len(data['articles']) == 1 and np.random.rand() > 0.5:
        if np.random.rand()>0.5:
            data_random1 = train_data[np.random.randint(len(train_data))]
            while data['img_local_path'] == data_random1['img_local_path']:
                data_random1 = train_data[np.random.randint(len(train_data))]
            cap1 = data_random1['articles'][np.random.randint(len(data_random1['articles']))]['caption_modified']
            data_random2 = train_data[np.random.randint(len(train_data))]
            while data['img_local_path'] == data_random2['img_local_path']:
                data_random2 = train_data[np.random.randint(len(train_data))]
            cap2 = data_random2['articles'][np.random.randint(len(data_random2['articles']))]['caption_modified']
        else:
             # Pick 1st correct caption
            if np.random.rand()>0.5:
                cap1 = data['articles'][np.random.randint(len(data['articles']))]['caption_modified']
                # Pick a random article then pick its first caption
                data_random = train_data[np.random.randint(len(train_data))]
                while data['img_local_path'] == data_random['img_local_path']:
                    data_random = train_data[np.random.randint(len(train_data))]
                cap2 = data_random['articles'][np.random.randint(len(data_random['articles']))]['caption_modified']
            else:
                cap2 = data['articles'][np.random.randint(len(data['articles']))]['caption_modified']
                # Pick a random article then pick its first caption
                data_random = train_data[np.random.randint(len(train_data))]
                if data['img_local_path'] == data_random['img_local_path']:
                    continue
                cap1 = data_random['articles'][np.random.randint(len(data_random['articles']))]['caption_modified']
        l.append([data['img_local_path'],[cap1],[cap2],[True]])
        bad+=1

 23%|██▎       | 18372/80877 [00:00<00:00, 91957.12it/s]

A pro-democracy activist covered in blue paint uses a megaphone during an anti-government protest, in GPE, GPE DATE.
A pro-democracy activist covered in blue paint uses a megaphone during an anti-government protest, in GPE, GPE DATE.


100%|██████████| 80877/80877 [00:00<00:00, 91667.89it/s]


In [5]:
dataframe = pd.DataFrame(
    l, columns=["image", "caption_1", "caption_2", "label"],
)

In [62]:
dataframe['label'].apply(lambda x:x[0]).value_counts()

False    30974
True     30784
Name: label, dtype: int64

In [6]:
def load_image(path):
   try:
      with open(path, "rb") as fp:
        return fp.read()
   except:
      return None

In [7]:
tqdm.pandas()

dataframe['image'] = dataframe['image'].progress_apply(lambda x: load_image(x))

  from pandas import Panel
100%|██████████| 61735/61735 [00:10<00:00, 5748.73it/s]


In [8]:
dataframe = dataframe[dataframe.image.notnull()]

In [16]:
fa = dataframe.rename(columns={'caption_1':'caption'}).drop(columns=['caption_2','label'])
fa['split'] = 'train'
fa = fa.rename_axis('image_id').reset_index()
fa.head()

Unnamed: 0,image_id,image,caption,split
0,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[Voters mark their ballots during early voting...,train
1,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[PERSON. But How Many Children He Has In Total...,train
2,2,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[GPE's premature baby was delivered by emergen...,train
3,3,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[The DATE on GPE Photo Gallery of the DATE on...,train
4,4,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[A Rohingya refugee man holds his child up as ...,train


In [21]:
table = pa.Table.from_pandas(fa)
split = 'train'
with pa.OSFile(f"fa/f30k_caption_karpathy_test.arrow", "wb") as sink:
    with pa.RecordBatchFileWriter(sink, table.schema) as writer:
        writer.write_table(table)

In [26]:
from vilt.datasets import F30KCaptionKarpathyDataset
import torch
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForWholeWordMask,
    BertTokenizer,
)

In [28]:
def get_pretrained_tokenizer(from_pretrained):
    if torch.distributed.is_initialized():
        if torch.distributed.get_rank() == 0:
            BertTokenizer.from_pretrained(
                from_pretrained, do_lower_case="uncased" in from_pretrained
            )
        torch.distributed.barrier()
    return BertTokenizer.from_pretrained(
        from_pretrained, do_lower_case="uncased" in from_pretrained
    )
tokenizer = get_pretrained_tokenizer("bert-base-uncased")
n = F30KCaptionKarpathyDataset(data_dir='fa', split='train',transform_keys=["pixelbert_randaug"],image_size=384)
n.tokenizer = tokenizer


In [33]:
n[10]

{'image': [tensor([[[ 0.0039,  0.0039,  0.0039,  ...,  0.0039,  0.0039,  0.0039],
           [ 0.0039,  0.0039,  0.0039,  ...,  0.0039,  0.0039,  0.0039],
           [ 0.0039,  0.0039,  0.0039,  ...,  0.0039,  0.0039,  0.0039],
           ...,
           [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
           [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
           [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000]],
  
          [[ 0.0039,  0.0039,  0.0039,  ...,  0.0039,  0.0039,  0.0039],
           [ 0.0039,  0.0039,  0.0039,  ...,  0.0039,  0.0039,  0.0039],
           [ 0.0039,  0.0039,  0.0039,  ...,  0.0039,  0.0039,  0.0039],
           ...,
           [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
           [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
           [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000]],
  
          [[ 0.0039,  0.0039,  0.0039,  ...,  0.0039,  0.0039,  0.0039],
  

# PyArrow

In [69]:
table = pa.Table.from_pandas(dataframe)

In [70]:
split = 'train'
with pa.OSFile(f"dataset_neg/cosmos_{split}.arrow", "wb") as sink:
    with pa.RecordBatchFileWriter(sink, table.schema) as writer:
        writer.write_table(table)

# Val

In [71]:
# np.random.seed(42)
val_data_sample = np.random.choice(val_data, size=int(len(val_data)*50/100))
# train_data_sample=train_data[:1]
# Not OOC cases
nice = 0
ne = 0
bad = 0
l = []
for data in tqdm(val_data_sample):
    if len(data['articles']) > 1:
        # Pick 2 random caption
        caption_1_idx, caption_2_idx = random.sample(range(0,len(data['articles'])),2)
        cap1 = data['articles'][caption_1_idx]['caption_modified']
        cap2 = data['articles'][caption_2_idx]['caption_modified']
        if data['img_local_path'] == 'train/109750.jpg':
            print(cap1)
            print(cap2)
        if cap1 != cap2:
            l.append([data['img_local_path'],[cap1],[cap2],[False]])
            nice += 1
        else:
            cap2 = sn.negate(cap2)
            if cap1 != cap2:
                l.append([data['img_local_path'],[cap1],[cap2],[True]])
                ne += 1

    if len(data['articles']) == 1 and np.random.rand() > 0.5:
        if np.random.rand()>0.5:
            data_random1 = train_data[np.random.randint(len(train_data))]
            while data['img_local_path'] == data_random1['img_local_path']:
                data_random1 = train_data[np.random.randint(len(train_data))]
            cap1 = data_random1['articles'][np.random.randint(len(data_random1['articles']))]['caption_modified']
            data_random2 = train_data[np.random.randint(len(train_data))]
            while data['img_local_path'] == data_random2['img_local_path']:
                data_random2 = train_data[np.random.randint(len(train_data))]
            cap2 = data_random2['articles'][np.random.randint(len(data_random2['articles']))]['caption_modified']
        else:
             # Pick 1st correct caption
            if np.random.rand()>0.5:
                cap1 = data['articles'][np.random.randint(len(data['articles']))]['caption_modified']
                # Pick a random article then pick its first caption
                data_random = train_data[np.random.randint(len(train_data))]
                while data['img_local_path'] == data_random['img_local_path']:
                    data_random = train_data[np.random.randint(len(train_data))]
                cap2 = data_random['articles'][np.random.randint(len(data_random['articles']))]['caption_modified']
            else:
                cap2 = data['articles'][np.random.randint(len(data['articles']))]['caption_modified']
                # Pick a random article then pick its first caption
                data_random = train_data[np.random.randint(len(train_data))]
                if data['img_local_path'] == data_random['img_local_path']:
                    continue
                cap1 = data_random['articles'][np.random.randint(len(data_random['articles']))]['caption_modified']
        l.append([data['img_local_path'],[cap1],[cap2],[True]])
        bad+=1

100%|██████████| 20503/20503 [00:00<00:00, 89303.62it/s]


In [72]:
nice,ne,bad

(7738, 4076, 3774)

In [73]:
dataframe = pd.DataFrame(
    l, columns=["image", "caption_1", "caption_2", "label"],
)

In [74]:
tqdm.pandas()

dataframe['image'] = dataframe['image'].progress_apply(lambda x: load_image(x))
dataframe = dataframe[dataframe.image.notnull()]

  from pandas import Panel
100%|██████████| 15588/15588 [00:01<00:00, 14784.73it/s]


In [75]:
table = pa.Table.from_pandas(dataframe)
split = 'val'
with pa.OSFile(f"dataset_neg/cosmos_{split}.arrow", "wb") as sink:
    with pa.RecordBatchFileWriter(sink, table.schema) as writer:
        writer.write_table(table)

# Test

In [77]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [78]:
def modify_caption_replace_entities(caption_text):
    """
        Utility function to replace named entities in the caption with their corresponding hypernyms
        Args:
            caption_text (str): Original caption with named entities
        Returns:
            caption_modified (str): Modified caption after replacing named entities
    """
    doc = nlp(caption_text)
    caption_modified = caption_text
    caption_entity_list = []
    for ent in doc.ents:
        caption_entity_list.append((ent.text, ent.label_))
        caption_modified = caption_modified.replace(ent.text, ent.label_, 1)
    return caption_modified

In [79]:
l_test  = []
for data in tqdm(test_data):
    cap1 = modify_caption_replace_entities(data['caption1'])
    cap2 = modify_caption_replace_entities(data['caption2'])
    l_test.append([data['img_local_path'],[cap1],[cap2],[data['context_label']==True]])

100%|██████████| 1700/1700 [00:18<00:00, 91.35it/s]


In [80]:
dataframe_test = pd.DataFrame(
    l_test, columns=["image", "caption_1", "caption_2", "label"],
)

In [81]:
dataframe_test

Unnamed: 0,image,caption_1,caption_2,label
0,test/0.jpg,"[PERSON at his announcement in GPE, GPE, on DA...","[PERSON at his announcement in GPE, GPE, on DA...",[False]
1,test/1.jpg,[Supporters of GPE's ruling ORG party come out...,[A person sits on a truck as supporters of the...,[False]
2,test/2.jpg,[CARDINAL dead people turned up on the state’s...,[These social media posts did not link to a re...,[True]
3,test/3.jpg,"[Actor, musician, director and devoted followe...",[A shocking report about the former child acto...,[True]
4,test/4.jpg,[Men from the LOC tribe perform a traditional ...,"[And on DATE in GPE's Narok county, young PERS...",[False]
...,...,...,...,...
1695,test/1695.jpg,[President PERSON trademarked the name 'WORK_O...,[There was no truth that PERSON family MONEY w...,[True]
1696,test/1696.jpg,[A photograph shows a soldier carrying a donke...,[Coronavirus meme featuring “EVENT donkey” is ...,[True]
1697,test/1697.jpg,[Homeless people living on streets in GPE],[ORG in GPE],[False]
1698,test/1698.jpg,[The castle's esplanade was a perfect spot for...,[Picture shows an ORG skier],[False]


In [82]:
tqdm.pandas()
dataframe_test['image'] = dataframe_test['image'].progress_apply(lambda x: load_image(x))

  from pandas import Panel
100%|██████████| 1700/1700 [00:00<00:00, 8804.80it/s]


In [83]:
dataframe_test

Unnamed: 0,image,caption_1,caption_2,label
0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[PERSON at his announcement in GPE, GPE, on DA...","[PERSON at his announcement in GPE, GPE, on DA...",[False]
1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[Supporters of GPE's ruling ORG party come out...,[A person sits on a truck as supporters of the...,[False]
2,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[CARDINAL dead people turned up on the state’s...,[These social media posts did not link to a re...,[True]
3,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[Actor, musician, director and devoted followe...",[A shocking report about the former child acto...,[True]
4,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[Men from the LOC tribe perform a traditional ...,"[And on DATE in GPE's Narok county, young PERS...",[False]
...,...,...,...,...
1695,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[President PERSON trademarked the name 'WORK_O...,[There was no truth that PERSON family MONEY w...,[True]
1696,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[A photograph shows a soldier carrying a donke...,[Coronavirus meme featuring “EVENT donkey” is ...,[True]
1697,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[Homeless people living on streets in GPE],[ORG in GPE],[False]
1698,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,[The castle's esplanade was a perfect spot for...,[Picture shows an ORG skier],[False]


In [84]:
dataframe_test['label'].apply(lambda x: x[0]).value_counts()

True     850
False    850
Name: label, dtype: int64

In [19]:
np.random.seed(42)
shuffled = dataframe_test.sample(frac=1).reset_index(drop=True)

In [22]:
train = shuffled[:1000]
val = shuffled[1000:1350]
test = shuffled[1350:]

In [29]:
test['label'].apply(lambda x: x[0]).value_counts()

False    178
True     172
Name: label, dtype: int64

# PyArrow

In [33]:
for dataframe,split in zip([train,val,test],['train','val','test']):
    table = pa.Table.from_pandas(dataframe)
    # split = 'test'
    with pa.OSFile(f"dataset_test_only/cosmos_{split}.arrow", "wb") as sink:
        with pa.RecordBatchFileWriter(sink, table.schema) as writer:
            writer.write_table(table)

In [None]:
table = pa.Table.from_pandas(dataframe_test)
split = 'test'
with pa.OSFile(f"dataset_neg/cosmos_{split}.arrow", "wb") as sink:
    with pa.RecordBatchFileWriter(sink, table.schema) as writer:
        writer.write_table(table)

In [None]:
def get_pretrained_tokenizer(from_pretrained):
    if torch.distributed.is_initialized():
        if torch.distributed.get_rank() == 0:
            BertTokenizer.from_pretrained(
                from_pretrained, do_lower_case="uncased" in from_pretrained
            )
        torch.distributed.barrier()
    return BertTokenizer.from_pretrained(
        from_pretrained, do_lower_case="uncased" in from_pretrained
    )
tokenizer = get_pretrained_tokenizer("bert-base-uncased")

# Dataset

In [42]:
from vilt.datasets.base_dataset import BaseDataset
class COSMOSDataset(BaseDataset):
    def __init__(self, *args, split="", **kwargs):
        assert split in ["train", "val", "test"]
        self.split = split

        if split == "train":
            names = ["cosmos_train"]
        elif split == "val":
            names = ["cosmos_val", "cosmos_test"]
        elif split == "test":
            names = ["cosmos_val", "cosmos_test"]

        super().__init__(
            *args,
            **kwargs,
            names=names,
            text_column_name="caption_1",
            remove_duplicate=False,
        )
    
    def get_text_2(self, raw_index):
        index, caption_index = self.index_mapper[raw_index]
        text = self.table['caption_2'][index][0].as_py()

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_text_len,
            return_special_tokens_mask=True,
        )
        return {
            "text": (text, encoding),
            "img_index": index,
            "cap_index": caption_index,
            "raw_index": raw_index,
        }

    def __getitem__(self, index):
        result = None
        while result is None:
            try:
                image_tensor = self.get_image(index, image_key="image")["image"]
                text = self.get_text(index)["text"]
                text2 = self.get_text_2(index)["text"]
                result = True
            except:
                print(
                    f"error while read file idx {index} in {self.names[0]}",
                    file=sys.stderr,
                )
                z
                index = random.randint(0, len(self.index_mapper) - 1)

        return {
            "image": image_tensor,
            "text": text,
            "text2": text2,
            "answers": self.table["label"][index] == True,
            "table_name": self.table_names[index],
        }


In [25]:
# Get tokenizer from
import torch
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForWholeWordMask,
    BertTokenizer,
)

def get_pretrained_tokenizer(from_pretrained):
    if torch.distributed.is_initialized():
        if torch.distributed.get_rank() == 0:
            BertTokenizer.from_pretrained(
                from_pretrained, do_lower_case="uncased" in from_pretrained
            )
        torch.distributed.barrier()
    return BertTokenizer.from_pretrained(
        from_pretrained, do_lower_case="uncased" in from_pretrained
    )
tokenizer = get_pretrained_tokenizer("bert-base-uncased")

In [43]:
n = COSMOSDataset(data_dir='dataset', split='train', transform_keys=["pixelbert_randaug"],image_size=384)
n.tokenizer = tokenizer

In [46]:
n[1]['text2']

("Mr. Zuckerberg wants to increase the utility of the social network to keep Facebook's billions of users highly engaged, people involved in the effort said.",
 {'input_ids': [101, 2720, 1012, 16950, 9102, 4059, 4122, 2000, 3623, 1996, 9710, 1997, 1996, 2591, 2897, 2000, 2562, 9130, 1005, 1055, 25501, 1997, 5198, 3811, 5117, 1010, 2111, 2920, 1999, 1996, 3947, 2056, 1012, 102, 0, 0, 0, 0, 0, 0], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'special_tokens_mask': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]})