# Fine-tuning BERTIMBAU

This notebook presents the code used to perform the finetuning of BERTimbau for the task of classifying speech acts in the Porttinari-base database.

Experiments were carried out varying the value of the variables epochs [1-5] and use_weights [True, False], they must be changed in each execution to create different models.

ref original code: https://github.com/chriskhanhtran/bert-for-sentiment-analysis/tree/master

In [1]:
#to run on google colab
#from google.colab import drive
#drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import re
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

%matplotlib inline

## Dataset

In [3]:
path_data = r'data/'
path_results = r'results/'
dataset_name = r'porttinari-annotated-sample-paper-v1-20231211.csv'
data = pd.read_csv(path_data+dataset_name)

data = data[~data['speech_act'].isin(['apology', 'congratulation'])] # insufficient data

data = data.groupby("speech_act").filter(lambda x: len(x) >= 6)
data['speech_act'] = data['speech_act'].str.strip()
data['speech_act'] = pd.Categorical(data['speech_act'])
data['label'] = data['speech_act'].cat.codes

In [4]:
data['label'].value_counts()

5     3725
8       96
10      64
3       62
4       26
1       24
0       22
6       18
12      13
11      11
9        7
7        6
2        6
Name: label, dtype: int64

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test = train_test_split(data, test_size=0.2, random_state=100, stratify=data['label'])
X_train, X_val = train_test_split(X_train, test_size=0.2, random_state=100, stratify=X_train['label'])
X_val= X_val.reset_index(drop=True)
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)

In [6]:
print(len(data))
print(len(X_train))
print(len(X_test))
print(len(X_val))

4080
2611
816
653


In [7]:
print(len(X_train['label'].value_counts()))
print(len(X_val['label'].value_counts()))
print(len(X_test['label'].value_counts()))

13
13
13


In [8]:
print(X_train['label'].value_counts())
print(X_val['label'].value_counts())
print(X_test['label'].value_counts())

5     2384
8       62
10      41
3       40
4       17
1       15
0       14
6       11
12       8
11       7
7        4
9        4
2        4
Name: label, dtype: int64
5     596
8      15
3      10
10     10
1       4
0       4
4       4
6       3
12      2
11      2
9       1
2       1
7       1
Name: label, dtype: int64
5     745
8      19
10     13
3      12
4       5
1       5
6       4
0       4
12      3
9       2
11      2
2       1
7       1
Name: label, dtype: int64


## Set up GPU for training

In [9]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
Device name: Tesla T4


# Fine-tuning BERTimbau

In [10]:
!pip install transformers



In [11]:
from transformers import BertTokenizer

# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('neuralmind/bert-large-portuguese-cased', do_lower_case=False)

# Create a function to tokenize a set of texts
def preprocessing_for_bert(data):
    """Perform required preprocessing steps for pretrained BERT.
    @param    data (np.array): Array of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
    """
    # Create empty lists to store outputs
    input_ids = []
    attention_masks = []

    # For every sentence...
    for sent in data:
        # `encode_plus` will:
        #    (1) Tokenize the sentence
        #    (2) Add the `[CLS]` and `[SEP]` token to the start and end
        #    (3) Truncate/Pad sentence to max length
        #    (4) Map tokens to their IDs
        #    (5) Create attention mask
        #    (6) Return a dictionary of outputs
        encoded_sent = tokenizer.encode_plus(
            text=sent,
            add_special_tokens=True,        # Add `[CLS]` and `[SEP]`
            max_length=MAX_LEN,                  # Max length to truncate/pad
            pad_to_max_length=True,         # Pad sentence to max length
            #return_tensors='pt',           # Return PyTorch tensor
            return_attention_mask=True      # Return attention mask
            )

        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))

    # Convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)

    return input_ids, attention_masks

Now let's tokenize our data.

In [12]:
# Specify `MAX_LEN`
MAX_LEN = 128

# Print sentence 0 and its encoded token ids
token_ids = list(preprocessing_for_bert([data['sentence'][0]])[0].squeeze().numpy())
print('Original: ', data['sentence'][0])
print('Token IDs: ', token_ids)

# Run function `preprocessing_for_bert` on the train set and the validation set
print('Tokenizing data...')
train_inputs, train_masks = preprocessing_for_bert(X_train['sentence'])
val_inputs, val_masks = preprocessing_for_bert(X_val['sentence'])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Original:  Eu sei que tô lascado , todo dia tem um processo .
Token IDs:  [101, 3396, 18661, 179, 160, 22347, 10497, 1196, 117, 1364, 644, 376, 222, 1673, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Tokenizing data...


### Create PyTorch DataLoader

We will create an iterator for our dataset using the torch DataLoader class. This will help save on memory during training and boost the training speed.

In [13]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

# Convert other data types to torch.Tensor
train_labels = torch.LongTensor(X_train['label'])
val_labels = torch.LongTensor(X_val['label'])

# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
batch_size = 32

# Create the DataLoader for our training set
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

# Create the DataLoader for our validation set
val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

## 3. Train Our Model

In [14]:
%%time
import torch
import torch.nn as nn
from transformers import BertModel

# Create the BertClassfier class
class BertClassifier(nn.Module):
    """Bert Model for Classification Tasks.
    """
    def __init__(self, freeze_bert=False):
        """
        @param    bert: a BertModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_bert (bool): Set `False` to fine-tune the BERT model
        """
        super(BertClassifier, self).__init__()
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in, H, D_out = 1024, 50, len(X_train['label'].unique())

        # Instantiate BERT model
        self.bert = BertModel.from_pretrained('neuralmind/bert-large-portuguese-cased')

        # Instantiate an one-layer feed-forward classifier
        self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            #nn.Dropout(0.5),
            nn.Linear(H, D_out)
        )

        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False

    def forward(self, input_ids, attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)

        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)

        return logits

CPU times: user 54.9 ms, sys: 2.77 ms, total: 57.7 ms
Wall time: 85.9 ms


### Optimizer & Learning Rate Scheduler

In [15]:
from transformers import AdamW, get_linear_schedule_with_warmup

def initialize_model(epochs=4):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(freeze_bert=False)

    # Tell PyTorch to run the model on GPU
    bert_classifier.to(device)

    # Create the optimizer
    optimizer = AdamW(bert_classifier.parameters(),
                      lr=5e-5,    # Default learning rate
                      eps=1e-8    # Default epsilon value
                      )

    # Total number of training steps
    total_steps = len(train_dataloader) * epochs

    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0, # Default value
                                                num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler

### Training Loop

### use weights on the cost function

In [16]:
from sklearn.utils import class_weight
use_weights = True # due to class imbalance
weights = class_weight.compute_class_weight(class_weight ='balanced',
                                             classes =   np.unique(X_train['label']),
                                             y =    X_train['label'])

In [17]:
weights = torch.Tensor(weights).to(device)
weights

tensor([14.3462, 13.3897, 50.2115,  5.0212, 11.8145,  0.0842, 18.2587, 50.2115,
         3.2395, 50.2115,  4.8987, 28.6923, 25.1058], device='cuda:0')

### Train

In [18]:
import random
import time

# Specify loss function
if use_weights:
  loss_fn = nn.CrossEntropyLoss(weight=weights)
else:
  loss_fn = nn.CrossEntropyLoss()

def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, train_dataloader, val_dataloader=None, epochs=4, evaluation=False):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================
        # Print the header of the result table
        print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")
        print("-"*70)

        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):
            batch_counts +=1
            # Load batch to GPU
            b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

            # Zero out any previously calculated gradients
            model.zero_grad()

            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids, b_attn_mask)

            # Compute loss and accumulate the loss values
            loss = loss_fn(logits, b_labels)
            batch_loss += loss.item()
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

            # Print the loss values and time elapsed for every 20 batches
            if (step % 20 == 0 and step != 0) or (step == len(train_dataloader) - 1):
                # Calculate time elapsed for 20 batches
                time_elapsed = time.time() - t0_batch

                # Print training results
                print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")

                # Reset batch tracking variables
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader)

        print("-"*70)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy = evaluate(model, val_dataloader)

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch

            print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {time_elapsed:^9.2f}")
            print("-"*70)
        print("\n")

    print("Training complete!")


def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    val_accuracy = []
    val_loss = []

    # For each batch in our validation set...
    for batch in val_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)

        # Compute loss
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())

        # Get the predictions
        preds = torch.argmax(logits, dim=1).flatten()

        # Calculate the accuracy rate
        accuracy = (preds == b_labels).cpu().numpy().mean() * 100
        val_accuracy.append(accuracy)

    # Compute the average accuracy and loss over the validation set.
    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)

    return val_loss, val_accuracy

### Evaluation on Validation Set

In [19]:
import torch.nn.functional as F

def bert_predict(model, test_dataloader):
    """Perform a forward pass on the trained BERT model to predict probabilities
    on the test set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    all_logits = []

    # For each batch in our test set...
    for batch in test_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask = tuple(t.to(device) for t in batch)[:2]

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)
        all_logits.append(logits)

    # Concatenate logits from each batch
    all_logits = torch.cat(all_logits, dim=0)

    # Apply softmax to calculate probabilities
    probs = F.softmax(all_logits, dim=1).cpu()

    return probs

In [20]:
epoch = 1

In [24]:
set_seed(42)    # Set seed for reproducibility
bert_classifier, optimizer, scheduler = initialize_model(epochs=epoch)
train(bert_classifier, train_dataloader, val_dataloader, epochs=epoch, evaluation=True)

pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]



Start training...

 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------
   1    |   20    |   2.562538   |     -      |     -     |   43.24  
   1    |   40    |   2.546507   |     -      |     -     |   40.32  
   1    |   60    |   2.411132   |     -      |     -     |   42.39  
   1    |   80    |   2.393500   |     -      |     -     |   42.00  
   1    |   81    |   1.902895   |     -      |     -     |   1.33   
----------------------------------------------------------------------
   1    |    -    |   2.472426   |  2.338967  |   76.27   |  184.77  
----------------------------------------------------------------------


Training complete!


In [27]:
# Compute predicted probabilities on the val set
probs = bert_predict(bert_classifier, val_dataloader)

big_values, preds = torch.max(probs, dim=1)

print(classification_report(list(X_val['label']), preds))

df_report = pd.DataFrame(classification_report(list(X_val['label']), preds, output_dict=True))
df_report.to_csv(path_results + 'bertimbau_epoch_{}_batch_size_{}_weights_{}_val.csv'.format(epoch, batch_size, str(use_weights)), index=False)

conf_df = pd.DataFrame(confusion_matrix(list(X_val['label']), preds))
conf_df.to_csv(path_results + 'bertimbau_epoch_{}_batch_size_{}_weights_{}_conf_matrix_val.csv'.format(epoch, batch_size, str(use_weights)), index=False)
conf_df

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       1.00      0.25      0.40         4
           2       0.00      0.00      0.00         1
           3       0.00      0.00      0.00        10
           4       0.00      0.00      0.00         4
           5       0.96      0.80      0.87       596
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         1
           8       1.00      0.60      0.75        15
           9       0.00      0.00      0.00         1
          10       0.06      0.90      0.12        10
          11       0.00      0.00      0.00         2
          12       0.00      0.00      0.00         2

    accuracy                           0.76       653
   macro avg       0.23      0.20      0.16       653
weighted avg       0.91      0.76      0.82       653



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0,0,0,0,0,4,0,0,0,0,0,0,0
1,0,1,0,0,0,2,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,6,0,0,0,0,4,0,0
4,0,0,0,0,0,2,0,0,0,0,2,0,0
5,0,0,0,0,0,476,0,0,0,0,119,0,1
6,0,0,0,0,0,1,0,0,0,0,2,0,0
7,0,0,0,0,0,1,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,9,0,6,0,0
9,0,0,0,0,0,1,0,0,0,0,0,0,0


In [21]:
# Concatenate the train set and the validation set
full_train_data = torch.utils.data.ConcatDataset([train_data, val_data])
full_train_sampler = RandomSampler(full_train_data)
full_train_dataloader = DataLoader(full_train_data, sampler=full_train_sampler, batch_size=batch_size)

# Train the Bert Classifier on the entire training data
set_seed(42)
bert_classifier, optimizer, scheduler = initialize_model(epochs=epoch)
train(bert_classifier, full_train_dataloader, epochs=epoch)



Start training...

 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------
   1    |   20    |   2.470035   |     -      |     -     |   43.36  
   1    |   40    |   2.425854   |     -      |     -     |   42.72  
   1    |   60    |   2.241327   |     -      |     -     |   42.14  
   1    |   80    |   2.496181   |     -      |     -     |   42.11  
   1    |   100   |   2.428128   |     -      |     -     |   42.29  
   1    |   101   |   2.054695   |     -      |     -     |   2.10   
----------------------------------------------------------------------


Training complete!


## 4. Predictions on Test Set

In [22]:
test_inputs, test_masks = preprocessing_for_bert(X_test.sentence)

# Create the DataLoader for our test set
test_dataset = TensorDataset(test_inputs, test_masks)
test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=batch_size)



### 4.2. Predictions

In [23]:
# Compute predicted probabilities on the test set
probs = bert_predict(bert_classifier, test_dataloader)

big_values, preds = torch.max(probs, dim=1)

In [24]:
print(classification_report(list(X_test['label']), preds))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.05      0.20      0.08         5
           2       0.00      0.00      0.00         1
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         5
           5       0.94      0.96      0.95       745
           6       0.00      0.00      0.00         4
           7       0.00      0.00      0.00         1
           8       0.59      0.84      0.70        19
           9       0.00      0.00      0.00         2
          10       0.25      0.08      0.12        13
          11       0.00      0.00      0.00         2
          12       0.00      0.00      0.00         3

    accuracy                           0.90       816
   macro avg       0.14      0.16      0.14       816
weighted avg       0.88      0.90      0.89       816



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
df_report = pd.DataFrame(classification_report(list(X_test['label']), preds, output_dict=True))
df_report.to_csv(path_results+'bertimbau_epoch_{}_batch_size_{}_weights_{}_test.csv'.format(epoch, batch_size, str(use_weights)), index=False)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
confusion_matrix(list(X_test['label']), preds)

array([[  0,   0,   0,   0,   0,   4,   0,   0,   0,   0,   0,   0,   0],
       [  0,   1,   0,   0,   0,   4,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   1,   0,   0,   0,   0,   0,   0,   0],
       [  0,   1,   0,   0,   0,  10,   0,   0,   1,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   4,   0,   0,   1,   0,   0,   0,   0],
       [  0,  18,   0,   0,   0, 718,   0,   0,   7,   0,   2,   0,   0],
       [  0,   0,   0,   0,   0,   4,   0,   0,   0,   0,   0,   0,   0],
       [  0,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   3,   0,   0,  16,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   1,   0,   0,   0,   0,   1,   0,   0],
       [  0,   0,   0,   0,   0,  11,   0,   0,   1,   0,   1,   0,   0],
       [  0,   0,   0,   0,   0,   1,   0,   0,   1,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   3,   0,   0,   0,   0,   0,   0,   0]])

In [None]:
conf_df = pd.DataFrame(confusion_matrix(list(X_test['label']), preds))
conf_df.to_csv(path_results+'bertimbau_epoch_{}_batch_size_{}_weights_{}_conf_matrix_test.csv'.format(epoch, batch_size, str(use_weights)), index=False)
conf_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0,0,0,0,0,4,0,0,0,0,0,0,0
1,0,1,0,0,0,4,0,0,0,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,0,0,0
3,0,1,0,0,0,10,0,0,1,0,0,0,0
4,0,0,0,0,0,4,0,0,1,0,0,0,0
5,0,18,0,0,0,718,0,0,7,0,2,0,0
6,0,0,0,0,0,4,0,0,0,0,0,0,0
7,0,1,0,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,3,0,0,16,0,0,0,0
9,0,0,0,0,0,1,0,0,0,0,1,0,0
