In [20]:
# !pip install transformers
# !pip install datasets

In [2]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-d3501a9d-44dc-0cfe-0c05-ca100ab4560d)


# Fine-tuning a model on a question-answering task
This notebook will show to fine-tune one of the 🤗 Transformers model to a question answering task, which is the task of extracting the answer to a question from a given context.
<br><br>
**Note** : This notebook finetunes models that answer question by taking a substring of a context, not by generating new text.

In [3]:
# set main parameters
squad_v2_flag = False
model_checkpoint = "distilbert-base-uncased"
batch_size = 16

# check execution time for whole code
import time
s_time = time.time()

In [4]:
import datasets

import pandas as pd
import numpy as np
import random
import collections
import tqdm

from IPython.display import display, HTML

import transformers
from transformers import AutoTokenizer
from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer

import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# datasets : 1.6.1  |  pd : 1.1.5  |  np : 1.19.5  |  tqdm : 4.41.1  |  transformers : 4.5.1  |  torch : 1.8.1+cu101
print(f'datasets : {datasets.__version__}  |  pd : {pd.__version__}  |  np : {np.__version__}  |  tqdm : {tqdm.__version__}  |  transformers : {transformers.__version__}  |  torch : {torch.__version__}')
print('device :', device)

datasets : 1.6.2  |  pd : 1.1.5  |  np : 1.19.5  |  tqdm : 4.41.1  |  transformers : 4.5.1  |  torch : 1.8.1+cu101
device : cuda


## 1. Loading the dataset & metric
- We will use the 🤗 Datasets library to download the data and get the metric we need to use for evaluation (to compare our model to the benchmark). This can be easily done with the functions `load_dataset` and `load_metric`.

- 🤗 Datasets library also provide `list_datasets()` function to get the list of all available datasets. There are about 21 datasets related to QA task.
  - ref : https://huggingface.co/datasets/squad_kor_v1 (Korean squad_v1 by LG CNS)
  - ref : https://huggingface.co/datasets/squad_kor_v2 (Korean squad_v2 by LG CNS)

In [5]:
# check dataset list
dset_list = datasets.list_datasets()
qa_dset_list = [i for i in dset_list if 'quad' in i]

print('>>> Total No of provided datasets :', len(dset_list))
print('>>> No of QA datasets :', len(qa_dset_list))
print(np.array([i for i in dset_list if 'quad' in i]))

>>> Total No of provided datasets : 855
>>> No of QA datasets : 21
['fquad' 'iapp_wiki_qa_squad' 'lc_quad' 'squad' 'squad_adversarial'
 'squad_es' 'squad_it' 'squad_kor_v1' 'squad_kor_v2' 'squad_v1_pt'
 'squad_v2' 'squadshifts' 'thaiqa_squad' 'xquad' 'xquad_r'
 'lhoestq/custom_squad' 'lhoestq/squad' 'piEsposito/br-quad-2.0'
 'piEsposito/br_quad_20' 'piEsposito/squad_20_ptbr'
 'susumu2357/squad_v2_sv']


In [6]:
# load dataset & metric
dset_dict = datasets.load_dataset('squad_v2' if squad_v2_flag else 'squad')
metric = datasets.load_metric("squad_v2" if squad_v2_flag else "squad")

# check dataset
print('\n>>> dataset object :')
display(dset_dict)
print('\n>>> sample data :')
display(dset_dict['train'][0])

Reusing dataset squad (/root/.cache/huggingface/datasets/squad/plain_text/1.0.0/4fffa6cf76083860f85fa83486ec3028e7e32c342c218ff2a620fc6b2868483a)



>>> dataset object :


DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 87599
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 10570
    })
})


>>> sample data :


{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
 'id': '5733be284776f41900661182',
 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
 'title': 'University_of_Notre_Dame'}

In [7]:
# show random sample of a dataset
def show_random_elements(dataset, num_examples=5):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    random.seed(777)
    picks = random.sample(range(len(dataset)), k=num_examples)
    
    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, datasets.ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
    display(HTML(df.to_html()))

show_random_elements(dset_dict["train"], 2)

Unnamed: 0,answers,context,id,question,title
0,"{'answer_start': [39], 'text': ['52%']}","In absolute terms, the planet has lost 52% of its biodiversity since 1970 according to a 2014 study by the World Wildlife Fund. The Living Planet Report 2014 claims that ""the number of mammals, birds, reptiles, amphibians and fish across the globe is, on average, about half the size it was 40 years ago"". Of that number, 39% accounts for the terrestrial wildlife gone, 39% for the marine wildlife gone, and 76% for the freshwater wildlife gone. Biodiversity took the biggest hit in Latin America, plummeting 83 percent. High-income countries showed a 10% increase in biodiversity, which was canceled out by a loss in low-income countries. This is despite the fact that high-income countries use five times the ecological resources of low-income countries, which was explained as a result of process whereby wealthy nations are outsourcing resource depletion to poorer nations, which are suffering the greatest ecosystem losses.",570bc6466b8089140040fa30,What percentage of biodiversity has the planet lost since 1970,Biodiversity
1,"{'answer_start': [252], 'text': ['more than 100']}","Later the emphasis was on classical studies, dominated by Latin and Ancient History, and, for boys with sufficient ability, Classical Greek. From the latter part of the 19th century this curriculum has changed and broadened: for example, there are now more than 100 students of Chinese, which is a non-curriculum course. In the 1970s, there was just one school computer, in a small room attached to the science buildings. It used paper tape to store programs. Today, all boys must have laptop computers, and the school fibre-optic network connects all classrooms and all boys' bedrooms to the internet.",5727bd3d4b864d1900163c00,How many current students take Chinese courses at Eaton?,Eton_College


## 2. Preprocessing the data


In [8]:
# set parameters for tokenizer
max_length = 384  # The maximum length of a feature (question and context)
doc_stride = 128  # The authorized overlap between two part of the context when splitting it is needed.

# donwload and initialize pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
pad_on_right = tokenizer.padding_side == 'right'

# check if fast tokenizer
print(tokenizer)
assert isinstance(tokenizer, transformers.PreTrainedTokenizerFast)  # raises error if False

PreTrainedTokenizerFast(name_or_path='distilbert-base-uncased', vocab_size=30522, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})


In [9]:
# check tokenizer output
tokenized = tokenizer("What is your name?", "My name is Sylvain.")
decoded = tokenizer.decode(tokenized['input_ids'])

for k, v in tokenized.items():
  print(f'>>> {k:<15} : {v}')
print(f'\n>>> decoded : "{decoded}"')

>>> input_ids       : [101, 2054, 2003, 2115, 2171, 1029, 102, 2026, 2171, 2003, 25353, 22144, 2378, 1012, 102]
>>> attention_mask  : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

>>> decoded : "[CLS] what is your name? [SEP] my name is sylvain. [SEP]"


### (hard to understand this part)

- Now one specific thing for the preprocessing in question answering is how to deal with very long documents. We usually truncate them in other tasks, when they are longer than the model maximum sentence length, but here, removing part of the the context might result in losing the answer we are looking for.

- To deal with this, we will allow one (long) example in our dataset to give several input features, whose length shorter than the maximum length of the model (or the one we set as a hyper-parameter). Also, just in case the answer lies at the point we split a long context, we allow some overlap between the features we generate controlled by the hyper-parameter `doc_stride`:

In [10]:
# get one example longer than max_length
for i, example in enumerate(dset_dict['train']):
  input_len = len(tokenizer(example['context'], example['question'])['input_ids'])
  if input_len > max_length:
    print(f'>>> found {i+1}th example input with {input_len} tokens')
    break

example

>>> found 250th example input with 396 tokens


{'answers': {'answer_start': [30], 'text': ['over 1,600']},
 'context': "The men's basketball team has over 1,600 wins, one of only 12 schools who have reached that mark, and have appeared in 28 NCAA tournaments. Former player Austin Carr holds the record for most points scored in a single game of the tournament with 61. Although the team has never won the NCAA Tournament, they were named by the Helms Athletic Foundation as national champions twice. The team has orchestrated a number of upsets of number one ranked teams, the most notable of which was ending UCLA's record 88-game winning streak in 1974. The team has beaten an additional eight number-one teams, and those nine wins rank second, to UCLA's 10, all-time in wins against the top team. The team plays in newly renovated Purcell Pavilion (within the Edmund P. Joyce Center), which reopened for the beginning of the 2009–2010 season. The team is coached by Mike Brey, who, as of the 2014–15 season, his fifteenth at Notre Dame, has ac

In [11]:
# check example after tokenization (with truncation, stride, return_overflowing_tokens)
tokenized_example = tokenizer(
    example["question" if pad_on_right else "context"],
    example["context" if pad_on_right else "question"],
    truncation='only_second' if pad_on_right else 'only_first',  # never truncate the question, only the context
    max_length=max_length,
    stride=doc_stride,  # No of tokens to overlap between truncated text & overflowing text
    return_overflowing_tokens=True,  # return all overflowing texts (nested list for input_ids when overflowing)
    return_offsets_mapping=True,  # return offset_mapping, the corresponding start and end character in the original text
    padding="max_length",
)

for k in tokenized_example:
  if isinstance(tokenized_example[k][0], list):
    print(f'>>> {k} :')
    for lst in tokenized_example[k]:
      print('\t', lst[:30] + ['.....'] + lst[-30:])
    print()
  else:
    print(f'>>> {k} :\n\t{tokenized_example[k]}\n')

>>> input_ids :
	 [101, 2129, 2116, 5222, 2515, 1996, 10289, 8214, 2273, 1005, 1055, 3455, 2136, 2031, 1029, 102, 1996, 2273, 1005, 1055, 3455, 2136, 2038, 2058, 1015, 1010, 5174, 5222, 1010, 2028, '.....', 6862, 3946, 1998, 6986, 9530, 2532, 18533, 2239, 1010, 1996, 3554, 3493, 3786, 1996, 9523, 2120, 3410, 3804, 2630, 13664, 3807, 2076, 1996, 2161, 1012, 1996, 3590, 5222, 2020, 102]
	 [101, 2129, 2116, 5222, 2515, 1996, 10289, 8214, 2273, 1005, 1055, 3455, 2136, 2031, 1029, 102, 2528, 1012, 1996, 2230, 1516, 2340, 2136, 5531, 2049, 3180, 2161, 4396, 2193, 2698, '.....', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

>>> attention_mask :
	 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, '.....', 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
	 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, '.....', 0, 0, 0, 0, 0, 

In [12]:
# check truncated & overflowing texts
for lst in tokenized_example['input_ids']:
  print(len(lst))
  print(tokenizer.decode(lst), '\n')

384
[CLS] how many wins does the notre dame men's basketball team have? [SEP] the men's basketball team has over 1, 600 wins, one of only 12 schools who have reached that mark, and have appeared in 28 ncaa tournaments. former player austin carr holds the record for most points scored in a single game of the tournament with 61. although the team has never won the ncaa tournament, they were named by the helms athletic foundation as national champions twice. the team has orchestrated a number of upsets of number one ranked teams, the most notable of which was ending ucla's record 88 - game winning streak in 1974. the team has beaten an additional eight number - one teams, and those nine wins rank second, to ucla's 10, all - time in wins against the top team. the team plays in newly renovated purcell pavilion ( within the edmund p. joyce center ), which reopened for the beginning of the 2009 – 2010 season. the team is coached by mike brey, who, as of the 2014 – 15 season, his fifteenth at 

In [13]:
# check vocab & offset index
first_token_id, first_token_offset = tokenized_example['input_ids'][0][1], tokenized_example['offset_mapping'][0][1]
print('>>> token index in vocab :', first_token_id)
print('>>> token index in string :', first_token_offset)

decoded_tk = tokenizer.decode(first_token_id)
sliced_tk = example['question'][first_token_offset[0]:first_token_offset[1]]
print('\n>>> token from vocab :', decoded_tk)
print('>>> token from example string :', sliced_tk)

>>> token index in vocab : 2129
>>> token index in string : (0, 3)

>>> token from vocab : how
>>> token from example string : How


In [14]:
# check sequence_ids method (0 for first & 1 for second sequence of text, None for special tokens)
for i in tokenized_example.sequence_ids():
  if i is None: print()
  print(i, end=', ')


None, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
None, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [15]:
# answers = example['answers']
# answer_start = answers['answer_start'][0]
# answer_end = answer_start + len(answers['text'][0])

In [16]:
%%time

# function to preprocess texts in train dataset
def prepare_train_features(examples):
  tokenized_examples = tokenizer(
      examples["question" if pad_on_right else "context"],
      examples["context" if pad_on_right else "question"],
      truncation='only_second' if pad_on_right else 'only_first',  # never truncate the question, only the context
      max_length=max_length,
      stride=doc_stride,  # No of tokens to overlap between truncated text & overflowing text
      return_overflowing_tokens=True,  # return all overflowing texts (nested list for input_ids when overflowing)
      return_offsets_mapping=True,  # return offset_mapping, the corresponding start and end character in the original text
      padding="max_length",
  )

  sample_mapping = tokenized_examples.pop('overflow_to_sample_mapping')
  offset_mapping = tokenized_examples.pop('offset_mapping')

  tokenized_examples['start_positions'] = []
  tokenized_examples['end_positions'] = []

  for i, offsets in enumerate(offset_mapping):
    input_ids = tokenized_examples['input_ids'][i]
    cls_id = input_ids.index(tokenizer.cls_token_id)
    sequence_ids = tokenized_examples.sequence_ids(i)

    sample_id = sample_mapping[i]
    answers = examples['answers'][sample_id]

    if len(answers['answer_start'])==0:
      tokenized_examples['start_positions'].append(cls_id)
      tokenized_examples['end_positions'].append(cls_id)
    else:
      # get start&end character index of the answer in the text.
      answer_start = answers['answer_start'][0]
      answer_end = answer_start + len(answers['text'][0])

      # start token index of the current span in the text
      token_start_id = 0
      while sequence_ids[token_start_id] != (1 if pad_on_right else 0):
        token_start_id += 1
      
      # end token index of the current span in the text
      token_end_id = len(input_ids)-1
      while sequence_ids[token_end_id] != (1 if pad_on_right else 0):
        token_end_id -= 1

      if answer_start < offsets[token_start_id][0] or offsets[token_end_id][1] < answer_end:
        # detect if the answer is out of the span (in which case this feature is labeled with the CLS index)
        tokenized_examples['start_positions'].append(cls_id)
        tokenized_examples['end_positions'].append(cls_id)
      else:
        # otherwise move the token_start_index and token_end_index to the two ends of the answer
        # NOTE : token_end_index can go after the last offset if the answer is the last word (edge case)
        while token_start_id < len(offsets) and offsets[token_start_id][0] <= answer_start:
          token_start_id += 1
        tokenized_examples['start_positions'].append(token_start_id - 1)
          
        while offsets[token_end_id][1] >= token_end_id:
          token_end_id -= 1
        tokenized_examples['end_positions'].append(token_end_id + 1)
  
  return tokenized_examples


# apply function to preprocess texts in train dataset
## since the function changes the number of samples, we need to remove the old columns when applying it
tokenized_datasets = dset_dict.map(
    prepare_train_features,  # function to apply
    batched=True,  # encode the texts by batches together (fast tokenizer's multi-threading to treat the texts in a batch concurrently)
    remove_columns=dset_dict['train'].column_names  # remove the old columns
)

HBox(children=(FloatProgress(value=0.0, max=88.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))


CPU times: user 1min 47s, sys: 1.07 s, total: 1min 48s
Wall time: 46.9 s


In [17]:
# check items in sample from original dataset
for k, v in dset_dict['train'][0].items():
  print(f'>>> {k} ({len(v)} items) :\n\t{v}\n') if isinstance(v, list) else  print(f'>>> {k} (1 items) :\n\t{v}\n')

>>> answers (1 items) :
	{'answer_start': [515], 'text': ['Saint Bernadette Soubirous']}

>>> context (1 items) :
	Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.

>>> id (1 items) :
	5733be284776f41900661182

>>> question (1 items) :
	To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?

>>> title (1 items) :
	University_of_Notre_

In [18]:
# check items in sample from tokenized dataset
for k, v in tokenized_datasets['train'][0].items():
  print(f'>>> {k} ({len(v)} items) :\n\t{v}\n') if isinstance(v, list) else  print(f'>>> {k} (1 items) :\n\t{v}\n')

>>> attention_mask (384 items) :
	[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [19]:
# check items in sample from tokenized dataset (5 samples)
for k, v in tokenized_datasets['train'][:5].items():
  print(f'{k} :\n\t{v}\n')

attention_mask :
	[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,