In [1]:
import matplotlib
import numpy as np

matplotlib.use('Agg')  # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt  # drawing heat map of attention weights

plt.rcParams['font.sans-serif'] = ['SimSun']  # set font family

In [2]:
import plotly.graph_objects as go

In [3]:
import pickle as pkl

import numpy as np
import torch

from transformers import AutoModelForMultipleChoice, AutoTokenizer
from datasets import load_dataset

from src.utils_multiple_choice import convert_examples_to_features, InputExample

In [4]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [5]:
model = AutoModelForMultipleChoice.from_pretrained("../assets/models/bb_race_m/")
tokenizer = AutoTokenizer.from_pretrained("../assets/models/bb_race_m")

dataset = load_dataset("race", "middle")
test = dataset['test']

label_list = ["0", "1", "2", "3"]
label_map = {
    0: "A",
    1: "B",
    2: "C",
    3: "D"
}
max_seq_length = 128

Reusing dataset race (/home/marcos/.cache/huggingface/datasets/race/middle/0.1.0/a7d1fac780e70c0e75bca35e9f2f8cfc1411edd18ffd6858ddce56f70dfb1e7c)


In [6]:
def predict(article, question, options, real_label=None, return_result=False):
    examples = [InputExample(
        example_id="pred",
        question=question,
        contexts=[article, article, article, article],  # this is not efficient but convenient
        endings=[options[0], options[1], options[2], options[3]],
        label=str(ord(real_label) - ord("A")) if real_label else "0"
    )]
    
    feature = convert_examples_to_features(
        examples,
        label_list,
        max_seq_length,
        tokenizer
    )[0]
    
    features = {
        'input_ids': torch.tensor([feature.input_ids]),
        'attention_mask': torch.tensor([feature.attention_mask]),
        'token_type_ids': torch.tensor([feature.token_type_ids]),
    } 
    
    if return_result:
        result = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'], 
                               output_attentions=True, output_hidden_states=True, return_dict=True)
        return result
    else:
        result = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'])[0][0]
        print(result)
        return np.array([float(abs(x)) for x in result]).argmax()

In [129]:
ex = test[0]

article = ex['article']
question = ex['question']
options = ex['options']
real_label = ex['answer']

result = predict(article, question, options, real_label)
print(f"Question: {question}")
print(f"Options: {options}")
print(f"Result: {label_map[result]}")

convert examples to features: 1it [00:00, 60.09it/s]


tensor([-2.8149, -3.3435, -3.6580, -2.5833], grad_fn=<SelectBackward>)
Question: A discipline leader is supposed to  _  .
Options: ['take care of the whole group', 'make sure that everybody finishes homework', 'make sure that nobody chats in class', 'collect all the homework and hand it in to teachers']
Result: C


In [71]:
result = predict(article, question, options, real_label, return_result=True)

convert examples to features: 1it [00:00, 71.65it/s]


In [143]:
def get_atts(ex, option):
    examples = [InputExample(
        example_id="pred",
        question=ex['question'],
        contexts=[ex['article'], ex['article'], ex['article'], ex['article']],  # this is not efficient but convenient
        endings=[ex['options'][0], ex['options'][1], ex['options'][2], ex['options'][3]],
        label=str(ord(ex['answer']) - ord("A")) if ex['answer'] else "0"
    )]
    
    feature = convert_examples_to_features(
        examples,
        label_list,
        max_seq_length,
        tokenizer
    )[0]
    
    features = {
        'input_ids': torch.tensor([feature.input_ids]),
        'attention_mask': torch.tensor([feature.attention_mask]),
        'token_type_ids': torch.tensor([feature.token_type_ids]),
    } 
    input_id_list = feature.input_ids[option]
    tokens = tokenizer.convert_ids_to_tokens(input_id_list) 
    token_type_ids = feature.token_type_ids
    
    attention = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'], 
                               output_attentions=True, output_hidden_states=True, return_dict=True)['attentions']
    
    q_start = token_type_ids[option].index(1)
    option_start = len(tokenizer.tokenize(question)[:-2])
    tokens_ = tokens[q_start:]
    atts = []
    for att in attention:

        atts.append(att[option].reshape(1, 12, 128, 128)[:, :, q_start:, q_start:])

    return atts, tokens_, option_start

In [144]:
atts, tokens, option_start = get_atts(ex, option=2)

convert examples to features: 1it [00:00, 61.69it/s]


In [145]:
tokens, option_start

(['what',
  'is',
  'a',
  'discipline',
  'leader',
  '?',
  'a',
  'person',
  'supposed',
  'to',
  'make',
  'sure',
  'that',
  'nobody',
  'chat',
  '##s',
  'in',
  'class',
  '[SEP]'],
 6)

##### Heatmap

In [146]:
ex['question'] = "What is a discipline leader?"
ex['options'] = [
    "A person supposed to take care of the whole group",
    "A person supposed to make sure that everybody finished homework",
    "A person supposed to make sure that nobody chats in class",
    "A person supposed to collect all the homework and hand it in to teachers"
]

In [147]:
for option in range(4):
    atts, tokens, option_start = get_atts(ex, option=option)
    data = []
    for word_q in range(option_start):
        data.append(np.sum([atts[0][0][i][word_q][option_start:-1] for i in range(len(atts[0][0]))]).detach().numpy())

    data = np.array(data)

    fig, ax = plt.subplots(figsize=(20, 8))  # set figure size
    heatmap = ax.pcolor(data, cmap=plt.cm.Blues, alpha=0.9)

    X_label = tokens[option_start:]
    xticks = range(0, len(X_label))
    ax.set_xticks(xticks, minor=False)  # major ticks
    ax.set_xticklabels(X_label, minor=False, rotation=45)  # labels should be 'unicode'

    Y_label = tokens[:option_start]
    yticks = range(0, len(Y_label))
    ax.set_yticks(yticks, minor=False)
    ax.set_yticklabels(Y_label, minor=False)  # labels should be 'unicode'

    ax.grid(True)

    plt.title(f'Attention Heatmap for Option {option}')
    file_name = f'../assets/attentions/attention_heatmap_ex1_modB_{option}.jpg'
    print("Saving figures %s" % file_name)
    fig.savefig(file_name)  # save the figure to file
    plt.close(fig) 

convert examples to features: 1it [00:00, 78.00it/s]
convert examples to features: 1it [00:00, 75.95it/s]

Saving figures ../assets/attentions/attention_heatmap_ex1_modB_0.jpg



convert examples to features: 1it [00:00, 75.63it/s]

Saving figures ../assets/attentions/attention_heatmap_ex1_modB_1.jpg



convert examples to features: 1it [00:00, 85.60it/s]

Saving figures ../assets/attentions/attention_heatmap_ex1_modB_2.jpg





Saving figures ../assets/attentions/attention_heatmap_ex1_modB_3.jpg


##### 3D Attention

In [76]:
option=0
atts, tokens, option_start = get_atts(ex, option=option)
data = []
for word_q in range(option_start):
    data_word_q = []
    for i in range(len(atts[0][0])):
        data_word_q.append(atts[0][0][i][word_q][option_start:-1].detach().numpy())
    data.append(data_word_q)
data = np.array(data)

convert examples to features: 1it [00:00, 70.84it/s]


In [104]:
from mpl_toolkits.mplot3d import Axes3D

In [129]:
x = np.arange(data.shape[0])[:, None, None]
y = np.arange(data.shape[1])[None, :, None]
z = np.arange(data.shape[2])[None, None, :]
x, y, z = np.broadcast_arrays(x, y, z)

In [120]:
c = np.tile(data.ravel()[:, None], [1, 3])

# Do the plotting in a single call.
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.scatter(x.ravel(),
           y.ravel(),
           z.ravel(),
           c=c)

<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7f9b82c44ee0>

In [121]:
plt.savefig("demo.png")

In [167]:
a = zip(*data)

In [168]:
a = np.array(list(a))

In [175]:
a[0]

array([[0.0559358 , 0.04751505, 0.03657236, 0.08261423, 0.06124244,
        0.04445195, 0.07587941],
       [0.05117421, 0.03134036, 0.03457394, 0.04409941, 0.05890233,
        0.03352771, 0.04296431],
       [0.03788564, 0.05488049, 0.0370061 , 0.0292864 , 0.05319819,
        0.03009209, 0.04455039],
       [0.07881433, 0.03995156, 0.06948719, 0.02964894, 0.07002679,
        0.02887556, 0.02755512],
       [0.04289049, 0.03814007, 0.03931659, 0.06014733, 0.03161725,
        0.04444621, 0.08969691],
       [0.05505043, 0.0340323 , 0.06556389, 0.03901948, 0.05987165,
        0.04316136, 0.08621822]], dtype=float32)

In [209]:
data_plot = []
for i in range(len(a)):
    x = list(range(len(a[i])))
    y = list(range(len(a[i][0])))
    z = a[i]
    data_plot.append(go.Surface(x=z, y=y, z=x))

In [210]:
fig = go.Figure(data=data_plot[0])
fig.update_layout(title='Attention 3D map')
fig.show()

##### Most important words for question

In [77]:
atts, tokens, option_start = get_atts(ex, option=2)
for word_q in range(option_start):
    att_word = np.sum([atts[0][0][i][word_q][option_start:-1] for i in range(len(atts[0][0]))]).detach().numpy()
    most_important_ = [(tokens[option_start+i], att_word[i]) for i in att_word.argsort()[::-1][:3]]
    print(f"3 most important words for: {tokens[word_q]}")
    print(most_important_)

convert examples to features: 1it [00:00, 66.23it/s]


3 most important words for: a
[('sure', 0.37506667), ('nobody', 0.3254553), ('make', 0.26634285)]
3 most important words for: discipline
[('class', 0.47839275), ('nobody', 0.28838113), ('chat', 0.28794184)]
3 most important words for: leader
[('nobody', 0.53921396), ('class', 0.51717293), ('chat', 0.43761465)]
3 most important words for: is
[('sure', 0.78322494), ('make', 0.4561162), ('nobody', 0.4158663)]
3 most important words for: supposed
[('sure', 0.5143204), ('nobody', 0.49513042), ('make', 0.46454546)]
3 most important words for: to
[('make', 1.0848705), ('sure', 0.5960057), ('nobody', 0.4418942)]


In [76]:
ex_per = ex.copy()
ex_per['question'] = "An orderliness leader is supposed to _ ."
atts, tokens, option_start = get_atts(ex_per, option=2)
for word_q in range(option_start):
    att_word = np.sum([atts[0][0][i][word_q][option_start:-1] for i in range(len(atts[0][0]))]).detach().numpy()
    most_important_ = [(tokens[option_start+i], att_word[i]) for i in att_word.argsort()[::-1][:3]]
    print(f"3 most important words for: {tokens[word_q]}")
    print(most_important_)

convert examples to features: 1it [00:00, 59.08it/s]


3 most important words for: an
[('sure', 0.28402063), ('chat', 0.24966525), ('make', 0.24659507)]
3 most important words for: order
[('class', 0.41637936), ('nobody', 0.34273064), ('chat', 0.31957448)]
3 most important words for: ##liness
[('chat', 0.4093988), ('class', 0.36730373), ('nobody', 0.26383477)]
3 most important words for: leader
[('nobody', 0.51494277), ('class', 0.50157565), ('chat', 0.42535886)]
3 most important words for: is
[('sure', 0.7545784), ('make', 0.43230242), ('nobody', 0.39854592)]
3 most important words for: supposed
[('to', 0.90458107), ('sure', 0.5022197), ('nobody', 0.4854677)]


##### Attention - Graph

In [177]:
def get_full_atts(ex):
    examples = [InputExample(
        example_id="pred",
        question=ex['question'],
        contexts=[ex['article'], ex['article'], ex['article'], ex['article']],  # this is not efficient but convenient
        endings=[ex['options'][0], ex['options'][1], ex['options'][2], ex['options'][3]],
        label=str(ord(ex['answer']) - ord("A")) if ex['answer'] else "0"
    )]
    
    feature = convert_examples_to_features(
        examples,
        label_list,
        max_seq_length,
        tokenizer
    )[0]
    
    features = {
        'input_ids': torch.tensor([feature.input_ids]),
        'attention_mask': torch.tensor([feature.attention_mask]),
        'token_type_ids': torch.tensor([feature.token_type_ids]),
    } 
    input_id_list = feature.input_ids
    tokens = [tokenizer.convert_ids_to_tokens(input_id_list[i]) for i in range(4)] 
    token_type_ids = feature.token_type_ids
    q_starts = [token_type_ids[option].index(1) for option in range(4)]
    option_start = len(tokenizer.tokenize(question)[:-2])
    
    attention = model.forward(features['input_ids'], features['attention_mask'], features['token_type_ids'], 
                               output_attentions=True, output_hidden_states=True, return_dict=True)['attentions']

    return attention, input_id_list, tokens, token_type_ids, q_starts, option_start

In [179]:
atts, input_id_list, tokens, token_type_ids, q_starts, option_start = get_full_atts(ex)

convert examples to features: 1it [00:00, 54.99it/s]


### Understanding attention 

#####  Question -> Text -> Options

In [326]:
attention_question_text = 0
print(f"Question idxs: {(q_s, q_s+6)}, Text idxs: {(0, q_s)}")
attention_question_text = sum([atts[0][0][head][q_s: q_s+6, :q_s].sum()
                               for head in range(12)])
print(f"Attention sum: {attention_question_text}")

Question idxs: (110, 116), Text idxs: (0, 110)
Attention sum: 43.835784912109375


In [329]:
attention_options = []
for option in range(4):
    q_s = q_starts[option]
    print(f"Option {option} idxs: {(q_s, q_s+6)}")
    sum_tmp = sum([atts[0][option][head][:q_s, q_s+6:].sum() for head in range(12)])
    print(f"Attention sum: {sum_tmp}")
    attention_options.append(sum_tmp)

Option 0 idxs: (114, 120)
Attention sum: 17.34185028076172
Option 1 idxs: (114, 120)
Attention sum: 20.50775146484375
Option 2 idxs: (112, 118)
Attention sum: 20.639162063598633
Option 3 idxs: (110, 116)
Attention sum: 23.762786865234375


##### Question -> Sents -> Options

In [235]:
boundaries = [".", ";", ":", "?", "!", "[SEP]"]
boundaries_idxs = []
for i, tok in enumerate(tokens[0]):
    if tok in boundaries:
        print(i, tok)
        boundaries_idxs.append(i)
        
boundaries_idxs.insert(0, -1)
sents = []
for i in range(len(boundaries_idxs)-3):
    sents.append((boundaries_idxs[i]+1, boundaries_idxs[i+1]+1))
    
# sents.append((boundaries_idxs[i+1]+1, q_starts[option]))
sents

38 .
52 .
60 ?
73 !
94 .
113 [SEP]
126 .
127 [SEP]


##### Gettin sum of attention from question to each sentence

In [301]:
option = 1
q_s = q_starts[option]

sums_q_sents = []
for i in range(len(sents)):
    print(f"Question idxs: {(q_s, q_s+6)}, Sentence {i} idxs: {sents[i]}")
    sum_tmp = sum([atts[0][option][head][q_s: q_s+6, sents[i][0]:sents[i][1]].sum() for head in range(12)])
    sums_q_sents.append(sum_tmp)

Question idxs: (114, 120), Sentence 0 idxs: (0, 39)
Question idxs: (114, 120), Sentence 1 idxs: (39, 53)
Question idxs: (114, 120), Sentence 2 idxs: (53, 61)
Question idxs: (114, 120), Sentence 3 idxs: (61, 74)
Question idxs: (114, 120), Sentence 4 idxs: (74, 95)
Question idxs: (114, 120), Sentence 5 idxs: (95, 114)


In [314]:
sums_opts = {}

for option in range(4):
    q_s = q_starts[option]
    print(f"Option: {option} -> Question idx start: {q_s}")

    sums_opt_sents = []
    for i in range(len(sents)):
        print(f"Getting attention for sentence {i}, idxs {sents[i]}")
        sum_tmp = sum([atts[0][option][head][sents[i][0]:sents[i][1], q_s+6:].sum() for head in range(12)])
        sums_opt_sents.append(sum_tmp)
    
    print(f"Sums of attentions from sentences to option: \n {[m.item() for m in sums_opt_sents]}")
    sums_opts[f"opt_{option}"] = sums_opt_sents

Option: 0 -> Question idx start: 114
Getting attention for sentence 0, idxs (0, 39)
Getting attention for sentence 1, idxs (39, 53)
Getting attention for sentence 2, idxs (53, 61)
Getting attention for sentence 3, idxs (61, 74)
Getting attention for sentence 4, idxs (74, 95)
Getting attention for sentence 5, idxs (95, 114)
Sums of attentions from sentences to option: 
 [4.9643778800964355, 1.656477928161621, 0.8121659755706787, 1.4482377767562866, 2.566917657852173, 5.8936686515808105]
Option: 1 -> Question idx start: 114
Getting attention for sentence 0, idxs (0, 39)
Getting attention for sentence 1, idxs (39, 53)
Getting attention for sentence 2, idxs (53, 61)
Getting attention for sentence 3, idxs (61, 74)
Getting attention for sentence 4, idxs (74, 95)
Getting attention for sentence 5, idxs (95, 114)
Sums of attentions from sentences to option: 
 [5.921537399291992, 2.1102805137634277, 1.0417460203170776, 1.7367511987686157, 2.9345648288726807, 6.762868881225586]
Option: 2 -> Quest

In [315]:
q_to_s = [i.item() for i in sums_q_sents]
print(f"Attentions from Question to sentences:")
print(q_to_s)

Attentions from Question to sentences:
[12.285257339477539, 2.5942118167877197, 1.5213333368301392, 2.3853273391723633, 4.133277416229248, 11.259088516235352]


In [316]:
s_os = []
print(f"Attentions from sentences to option:")
for i in range(6):
    print(f"Sentence {i}")
    s_tmp = [sums_opts[opt][i].item() for opt in sums_opts]
    s_os.append(s_tmp)
    print(s_tmp)

Attentions from sentences to option:
Sentence 0
[4.9643778800964355, 5.921537399291992, 6.326493740081787, 7.656482696533203]
Sentence 1
[1.656477928161621, 2.1102805137634277, 2.172316551208496, 2.5430238246917725]
Sentence 2
[0.8121659755706787, 1.0417460203170776, 1.0766533613204956, 1.3248991966247559]
Sentence 3
[1.4482377767562866, 1.7367511987686157, 1.8338470458984375, 2.1620099544525146]
Sentence 4
[2.566917657852173, 2.9345648288726807, 3.240234613418579, 3.8975181579589844]
Sentence 5
[5.8936686515808105, 6.762868881225586, 10.644491195678711, 17.596210479736328]


##### Larger path

In [319]:
max_ = 0
max_path = {
    'sentence': '',
    'option': ''
}
for i in range(len(q_to_s)):
    tmp_sum = q_to_s[i]
    opt_max = 0
    opt_max_idx = 0
    for idx, j in enumerate(s_os[i]):
        if j > opt_max:
            opt_max = j
            opt_max_idx = idx
        print(f"Sent {i}. Option {idx}: {tmp_sum + j}")
    tmp_sum += opt_max
    if tmp_sum > max_:
        max_ = tmp_sum
        max_path['sentence'] = i
        max_path['option'] = opt_max_idx
    print("-"*50)
max_path['value'] = max_

Sent 0. Option 0: 17.249635219573975
Sent 0. Option 1: 18.20679473876953
Sent 0. Option 2: 18.611751079559326
Sent 0. Option 3: 19.941740036010742
--------------------------------------------------
Sent 1. Option 0: 4.250689744949341
Sent 1. Option 1: 4.7044923305511475
Sent 1. Option 2: 4.766528367996216
Sent 1. Option 3: 5.137235641479492
--------------------------------------------------
Sent 2. Option 0: 2.333499312400818
Sent 2. Option 1: 2.563079357147217
Sent 2. Option 2: 2.5979866981506348
Sent 2. Option 3: 2.846232533454895
--------------------------------------------------
Sent 3. Option 0: 3.83356511592865
Sent 3. Option 1: 4.122078537940979
Sent 3. Option 2: 4.219174385070801
Sent 3. Option 3: 4.547337293624878
--------------------------------------------------
Sent 4. Option 0: 6.700195074081421
Sent 4. Option 1: 7.067842245101929
Sent 4. Option 2: 7.373512029647827
Sent 4. Option 3: 8.030795574188232
--------------------------------------------------
Sent 5. Option 0: 17.

In [320]:
max_path

{'sentence': 5, 'option': 3, 'value': 28.85529899597168}

In [240]:
for i in range(len(boundaries_idxs)-3):
    print(" ".join(tokens[0][boundaries_idxs[i]+1:boundaries_idxs[i+1]+1]))

[CLS] take a class at du ##lang ##kou school , and you ' ll see lots of things different from other schools , you can see the desk ##s are not in rows and students sit in groups .
they put their desk ##s together so they ' re facing each other .
how can they see the black ##board ?
there are three black ##boards on the three walls of the classroom !
the school calls the new way of learning " tu ##ant ##uan ##zu ##o " , meaning sitting in groups .
wei li ##ying , a junior 3 teacher , said it was to give students more chances to [SEP]


In [136]:
ases[2]

tensor([[0.0091, 0.0117, 0.0027, 0.0053, 0.0047, 0.0070, 0.0067, 0.0033, 0.0060,
         0.0083, 0.0083, 0.0123, 0.0180],
        [0.0061, 0.0117, 0.0025, 0.0049, 0.0073, 0.0051, 0.0049, 0.0030, 0.0142,
         0.0063, 0.0053, 0.0165, 0.0163],
        [0.0063, 0.0106, 0.0032, 0.0069, 0.0055, 0.0066, 0.0053, 0.0035, 0.0078,
         0.0064, 0.0058, 0.0112, 0.0246],
        [0.0067, 0.0053, 0.0030, 0.0123, 0.0116, 0.0063, 0.0021, 0.0037, 0.0101,
         0.0032, 0.0025, 0.0228, 0.0267],
        [0.0095, 0.0079, 0.0066, 0.0072, 0.0052, 0.0099, 0.0101, 0.0072, 0.0081,
         0.0073, 0.0109, 0.0107, 0.0136],
        [0.0043, 0.0177, 0.0018, 0.0033, 0.0133, 0.0032, 0.0014, 0.0021, 0.0120,
         0.0025, 0.0015, 0.0172, 0.0238],
        [0.0089, 0.0078, 0.0039, 0.0081, 0.0097, 0.0125, 0.0039, 0.0042, 0.0068,
         0.0080, 0.0044, 0.0110, 0.0140],
        [0.0050, 0.0095, 0.0055, 0.0040, 0.0063, 0.0157, 0.0038, 0.0056, 0.0067,
         0.0113, 0.0038, 0.0087, 0.0097]], grad_fn=<SliceB

In [55]:
head = 0
option = 0
[atts[0][option][head][0][i].item() for i in range(128)]

[0.007986078038811684,
 0.006433010101318359,
 0.016517121344804764,
 0.009094485081732273,
 0.014354814775288105,
 0.00633564917370677,
 0.009717180393636227,
 0.004400583915412426,
 0.007701124995946884,
 0.007552080787718296,
 0.014600632712244987,
 0.005736075341701508,
 0.0055419872514903545,
 0.005400246940553188,
 0.008601262234151363,
 0.005937308073043823,
 0.010113371536135674,
 0.005900654476135969,
 0.013658925890922546,
 0.016021285206079483,
 0.015492475591599941,
 0.006026331335306168,
 0.007072122767567635,
 0.004459220916032791,
 0.010352755896747112,
 0.007686137687414885,
 0.01517962571233511,
 0.006878224201500416,
 0.009533190168440342,
 0.0058213393203914165,
 0.007195862475782633,
 0.009078454226255417,
 0.005383834708482027,
 0.010277562774717808,
 0.003571153385564685,
 0.005235304124653339,
 0.010028570890426636,
 0.004979913122951984,
 0.01313966978341341,
 0.0044998410157859325,
 0.0048428974114358425,
 0.007298056501895189,
 0.005732205230742693,
 0.0089958

In [65]:
tokens[0]

['[CLS]',
 'take',
 'a',
 'class',
 'at',
 'du',
 '##lang',
 '##kou',
 'school',
 ',',
 'and',
 'you',
 "'",
 'll',
 'see',
 'lots',
 'of',
 'things',
 'different',
 'from',
 'other',
 'schools',
 ',',
 'you',
 'can',
 'see',
 'the',
 'desk',
 '##s',
 'are',
 'not',
 'in',
 'rows',
 'and',
 'students',
 'sit',
 'in',
 'groups',
 '.',
 'they',
 'put',
 'their',
 'desk',
 '##s',
 'together',
 'so',
 'they',
 "'",
 're',
 'facing',
 'each',
 'other',
 '.',
 'how',
 'can',
 'they',
 'see',
 'the',
 'black',
 '##board',
 '?',
 'there',
 'are',
 'three',
 'black',
 '##boards',
 'on',
 'the',
 'three',
 'walls',
 'of',
 'the',
 'classroom',
 '!',
 'the',
 'school',
 'calls',
 'the',
 'new',
 'way',
 'of',
 'learning',
 '"',
 'tu',
 '##ant',
 '##uan',
 '##zu',
 '##o',
 '"',
 ',',
 'meaning',
 'sitting',
 'in',
 'groups',
 '.',
 'wei',
 'li',
 '##ying',
 ',',
 'a',
 'junior',
 '3',
 'teacher',
 ',',
 'said',
 'it',
 'was',
 'to',
 'give',
 'students',
 'more',
 'chances',
 'to',
 '[SEP]',
 'a',