In [3]:
!pip install transformers



In [4]:
!pip install torch



In [None]:
import torch

In [None]:
from transformers import BertForQuestionAnswering

model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

In [None]:
question = "How many parameters does BERT-large have?"
answer_text = "BERT-large is really big... it has 24-layers and an embedding size of 1,024, for a total of 340M parameters! Altogether it is 1.34GB, so expect it to take a couple minutes to download to your Colab instance."

In [None]:
# Apply the tokenizer to the input text, treating them as a text-pair.
input_ids = tokenizer.encode(question, answer_text)

print('The input has a total of {:} tokens.'.format(len(input_ids)))

In [None]:
# BERT only needs the token IDs, but for the purpose of inspecting the 
# tokenizer's behavior, let's also get the token strings and display them.
tokens = tokenizer.convert_ids_to_tokens(input_ids)

# For each token and its id...
for token, id in zip(tokens, input_ids):
    
    # If this is the [SEP] token, add some space around it to make it stand out.
    if id == tokenizer.sep_token_id:
        print('')
    
    # Print the token string and its ID in two columns.
    print('{:<12} {:>6,}'.format(token, id))

    if id == tokenizer.sep_token_id:
        print('')

In [None]:
# Search the input_ids for the first instance of the `[SEP]` token.
sep_index = input_ids.index(tokenizer.sep_token_id)

# The number of segment A tokens includes the [SEP] token istelf.
num_seg_a = sep_index + 1

# The remainder are segment B.
num_seg_b = len(input_ids) - num_seg_a

# Construct the list of 0s and 1s.
segment_ids = [0]*num_seg_a + [1]*num_seg_b

# There should be a segment_id for every input token.
assert len(segment_ids) == len(input_ids)

In [None]:
# Run our example through the model.
outputs = model(torch.tensor([input_ids]), # The tokens representing our input text.
                             token_type_ids=torch.tensor([segment_ids]), # The segment IDs to differentiate question from answer_text
                             return_dict=True) 

start_scores = outputs.start_logits
end_scores = outputs.end_logits

In [None]:
# Find the tokens with the highest `start` and `end` scores.
answer_start = torch.argmax(start_scores)
answer_end = torch.argmax(end_scores)

# Combine the tokens in the answer and print it out.
answer = ' '.join(tokens[answer_start:answer_end+1])

print('Answer: "' + answer + '"')

In [None]:
# Start with the first token.
answer = tokens[answer_start]

# Select the remaining answer tokens and join them with whitespace.
for i in range(answer_start + 1, answer_end + 1):
    
    # If it's a subword token, then recombine it with the previous token.
    if tokens[i][0:2] == '##':
        answer += tokens[i][2:]
    
    # Otherwise, add a space then the token.
    else:
        answer += ' ' + tokens[i]

print('Answer: "' + answer + '"')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
#sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (16,8)

In [None]:
# Pull the scores out of PyTorch Tensors and convert them to 1D numpy arrays.
s_scores = start_scores.detach().numpy().flatten()
e_scores = end_scores.detach().numpy().flatten()

# We'll use the tokens as the x-axis labels. In order to do that, they all need
# to be unique, so we'll add the token index to the end of each one.
token_labels = []
for (i, token) in enumerate(tokens):
    token_labels.append('{:} - {:>2}'.format(token, i))

In [None]:
def answer_question(question, answer_text):
    '''
    Takes a `question` string and an `answer_text` string (which contains the
    answer), and identifies the words within the `answer_text` that are the
    answer. Prints them out.
    '''
    # ======== Tokenize ========
    # Apply the tokenizer to the input text, treating them as a text-pair.
    input_ids = tokenizer.encode(question, answer_text)

    # Report how long the input sequence is.
    print('Query has {:,} tokens.\n'.format(len(input_ids)))

    # ======== Set Segment IDs ========
    # Search the input_ids for the first instance of the `[SEP]` token.
    sep_index = input_ids.index(tokenizer.sep_token_id)

    # The number of segment A tokens includes the [SEP] token istelf.
    num_seg_a = sep_index + 1

    # The remainder are segment B.
    num_seg_b = len(input_ids) - num_seg_a

    # Construct the list of 0s and 1s.
    segment_ids = [0]*num_seg_a + [1]*num_seg_b

    # There should be a segment_id for every input token.
    assert len(segment_ids) == len(input_ids)

    # ======== Evaluate ========
    # Run our example through the model.
    outputs = model(torch.tensor([input_ids]), # The tokens representing our input text.
                    token_type_ids=torch.tensor([segment_ids]), # The segment IDs to differentiate question from answer_text
                    return_dict=True) 

    start_scores = outputs.start_logits
    end_scores = outputs.end_logits

    # ======== Reconstruct Answer ========
    # Find the tokens with the highest `start` and `end` scores.
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)

    # Get the string versions of the input tokens.
    tokens = tokenizer.convert_ids_to_tokens(input_ids)

    # Start with the first token.
    answer = tokens[answer_start]

    # Select the remaining answer tokens and join them with whitespace.
    for i in range(answer_start + 1, answer_end + 1):
        
        # If it's a subword token, then recombine it with the previous token.
        if tokens[i][0:2] == '##':
            answer += tokens[i][2:]
        
        # Otherwise, add a space then the token.
        else:
            answer += ' ' + tokens[i]

    print('Answer: "' + answer + '"')

In [None]:
import textwrap

# Wrap text to 80 characters.
wrapper = textwrap.TextWrapper(width=80) 

bert_abstract = "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pretrain deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be finetuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial taskspecific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement)."

print(wrapper.fill(bert_abstract))

In [None]:
question = "What does the 'B' in BERT stand for?"

answer_question(question, bert_abstract)

In [None]:
question = "What are some example applications of BERT?"

answer_question(question, bert_abstract)

In [None]:
question = "What is your name?"
answer_question(question,, bert_abstract)

In [None]:
question = "What is your name?"
answer_question(question, bert_abstract)

In [None]:
question = "What is BERT?"
answer_question(question,z, bert_abstract)

In [None]:
question = "What is BERT?"
answer_question(question, bert_abstract)

In [25]:
question = "BERT"
answer_question(question, bert_abstract)

Query has 248 tokens.

Answer: "bid"


In [1]:
import textwrap

# Wrap text to 80 characters.
wrapper = textwrap.TextWrapper(width=80) 

bert_abstract = "사전적인 의미로의 뇌진탕은 구조변화 없이 기능적인 소실만이 잠시 일시적으로 나타났다 회복되는 것이므로 외상과 연관된 후유증은 없는 가벼운 상태로 규정되어 왔습니다. 그러나 엄밀한 의미로 구조적인 손상이 전혀 없다고 하기는 어렵다는 것이 최근의 견해입니다. 의식 소실의 시간이나 기억상실의 시간이 길다면 영구적인 기능 감소가 발생할 소지가 있는 것입니다. 가장 흔히 관찰되는 후유증은 뇌진탕 후 증후군(Post-Concussion syndrome)이라고 하며 주 증상으로는 두통 및 뇌신경관련 증상으로 어지럼증, 귀울림(이명), 청력감퇴, 시력장애 등을 호소합니다. 정신과적 증상으로 과민, 불안, 우울, 인격변화, 피로, 수면장애, 인지장애, 기억장애, 집중력 및 주의력 장애 등을 호소합니다. 대부분의 경우 3개월 내에 증상이 없어지지만, 소수의 경우 1년 이상 지속될 수 있습니다."

print(wrapper.fill(bert_abstract))

사전적인 의미로의 뇌진탕은 구조변화 없이 기능적인 소실만이 잠시 일시적으로 나타났다 회복되는 것이므로 외상과 연관된 후유증은 없는 가벼운 상태로
규정되어 왔습니다. 그러나 엄밀한 의미로 구조적인 손상이 전혀 없다고 하기는 어렵다는 것이 최근의 견해입니다. 의식 소실의 시간이나 기억상실의
시간이 길다면 영구적인 기능 감소가 발생할 소지가 있는 것입니다. 가장 흔히 관찰되는 후유증은 뇌진탕 후 증후군(Post-Concussion
syndrome)이라고 하며 주 증상으로는 두통 및 뇌신경관련 증상으로 어지럼증, 귀울림(이명), 청력감퇴, 시력장애 등을 호소합니다. 정신과적
증상으로 과민, 불안, 우울, 인격변화, 피로, 수면장애, 인지장애, 기억장애, 집중력 및 주의력 장애 등을 호소합니다. 대부분의 경우 3개월
내에 증상이 없어지지만, 소수의 경우 1년 이상 지속될 수 있습니다.


In [2]:
question = "뇌진탕의 후유증의 지속 기간은?"
answer_question(question, bert_abstract)

NameError: name 'answer_question' is not defined

In [28]:
import textwrap

# Wrap text to 80 characters.
wrapper = textwrap.TextWrapper(width=80) 

bert_abstract = "농가진 또는 농피증은 소아에서 가장 흔한 피부 감염증입니다. 황색 포도알균 또는 A군 사슬알균 등의 세균이 원인입니다. 주로 피부에 상처가 생기거나 벌레에 물린 후에 발생합니다. 작은 붉은 반점으로 시작해 얇은 수포나 농포가 생긴 후 터집니다. 터진 수포에서 노란 진물이 나오고 딱지가 앉으며, 때로 가려움이 동반되지만 통증이나 전신적인 발열은 거의 없습니다. 손이나 옷을 통해 피부의 다른 부위로 번질 수 있습니다. 피부 병변 부위를 깨끗이 씻어내고 항생제 연고를 국소적으로 발라줍니다."

print(wrapper.fill(bert_abstract))

농가진 또는 농피증은 소아에서 가장 흔한 피부 감염증입니다. 황색 포도알균 또는 A군 사슬알균 등의 세균이 원인입니다. 주로 피부에 상처가
생기거나 벌레에 물린 후에 발생합니다. 작은 붉은 반점으로 시작해 얇은 수포나 농포가 생긴 후 터집니다. 터진 수포에서 노란 진물이 나오고
딱지가 앉으며, 때로 가려움이 동반되지만 통증이나 전신적인 발열은 거의 없습니다. 손이나 옷을 통해 피부의 다른 부위로 번질 수 있습니다. 피부
병변 부위를 깨끗이 씻어내고 항생제 연고를 국소적으로 발라줍니다.


In [29]:
question = "소아에서 가장 흔하게 발생하는 피부 감염증은?"
answer_question(question, bert_abstract)

Query has 454 tokens.

Answer: "[CLS]"


In [30]:
print(outputs)

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[-6.4849, -6.4358, -8.1077, -8.8489, -7.8751, -8.0522, -8.4684, -8.5295,
         -7.7074, -9.2464, -6.4849, -2.7303, -6.3473, -5.7299, -7.7780, -7.0391,
         -6.3331, -7.3153, -7.3048, -7.4121, -2.2534, -5.3971, -0.9424, -7.3584,
         -5.4575, -7.0769, -4.4887, -3.9272, -5.6967, -5.9506, -5.0059, -5.9812,
          0.0530, -5.5968, -4.7093, -4.5750, -6.1786, -2.2294, -0.1904, -0.2327,
         -2.7331,  6.4256, -2.6543, -4.5655, -4.9872, -4.9834, -5.9110, -7.8402,
         -1.8986, -7.2123, -4.1543, -6.2354, -8.0953, -7.2329, -6.4411, -6.8384,
         -8.1032, -7.0570, -7.7332, -6.8711, -7.1045, -8.2966, -6.1939, -8.0817,
         -7.5501, -5.9695, -8.1007, -6.8849, -8.2273, -6.4850]],
       grad_fn=<CloneBackward0>), end_logits=tensor([[-2.0629, -6.3878, -6.2450, -6.3605, -7.0722, -7.6281, -7.1160, -6.8674,
         -7.1313, -7.1495, -2.0628, -5.0858, -4.7276, -3.5955, -6.3050, -7.1109,
         -4.4975, -4.7221, 

In [31]:
print(model)

BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1024,), eps=1e-12,

In [32]:
print(torch.tensor([1]))

tensor([1])


In [33]:
print(outputs.start_logits)

tensor([[-6.4849, -6.4358, -8.1077, -8.8489, -7.8751, -8.0522, -8.4684, -8.5295,
         -7.7074, -9.2464, -6.4849, -2.7303, -6.3473, -5.7299, -7.7780, -7.0391,
         -6.3331, -7.3153, -7.3048, -7.4121, -2.2534, -5.3971, -0.9424, -7.3584,
         -5.4575, -7.0769, -4.4887, -3.9272, -5.6967, -5.9506, -5.0059, -5.9812,
          0.0530, -5.5968, -4.7093, -4.5750, -6.1786, -2.2294, -0.1904, -0.2327,
         -2.7331,  6.4256, -2.6543, -4.5655, -4.9872, -4.9834, -5.9110, -7.8402,
         -1.8986, -7.2123, -4.1543, -6.2354, -8.0953, -7.2329, -6.4411, -6.8384,
         -8.1032, -7.0570, -7.7332, -6.8711, -7.1045, -8.2966, -6.1939, -8.0817,
         -7.5501, -5.9695, -8.1007, -6.8849, -8.2273, -6.4850]],
       grad_fn=<CloneBackward0>)


In [34]:
print(outputs.end_logits)

tensor([[-2.0629, -6.3878, -6.2450, -6.3605, -7.0722, -7.6281, -7.1160, -6.8674,
         -7.1313, -7.1495, -2.0628, -5.0858, -4.7276, -3.5955, -6.3050, -7.1109,
         -4.4975, -4.7221, -5.4760, -5.5441, -6.1391, -5.8593, -0.4636, -4.3720,
         -1.0411, -5.3359, -6.2969, -6.1156, -5.1736, -4.6144, -4.8274, -6.3638,
         -4.2078, -5.2329, -4.7127,  0.7952, -0.7376, -4.5555, -5.2985, -3.6082,
         -3.7726,  2.7501,  5.4644,  4.1220,  1.2127, -5.5042, -5.8367, -6.0745,
         -3.8426, -5.8273, -1.9782, -1.3083, -2.4872, -5.3204, -6.5550, -6.3885,
         -6.8736, -6.3949, -7.0454, -6.0590, -4.5225, -6.6686, -4.0074, -6.9146,
         -6.9742, -6.5173, -4.8760, -4.4629, -4.7580, -2.0631]],
       grad_fn=<CloneBackward0>)
