In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import math
import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import torch.nn.functional as F

In [3]:
import torch
import os
from tqdm import tqdm,trange
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split

In [4]:
from transformers import (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer)

In [6]:
def replace_new_line(a):
    return(a.replace('\n' , ' '))

In [7]:
df=pd.read_csv('data/25_train_data.csv', sep=',',encoding = 'ISO-8859-1')
df=df[['text_block_w','label']]

df.text_block_w=df.text_block_w.apply(replace_new_line)

print(df.shape)
df.head()

(122417, 2)


Unnamed: 0,text_block_w,label
0,ajority of the Services or materially impacts ...,0
1,s licensors in a manner not authorised under t...,0
2,ir WHS performance is managed. (iv) At NAB's ...,0
3,35 28.9 Variations & waiver 35 28.10 In...,0
4,ts (such term having the same meaning as in th...,0


In [8]:
df_e=pd.read_csv('data/25_eval_data.csv', sep=',',encoding = 'ISO-8859-1')
print(df_e.shape)
df_e=df_e[['text_block_w','label']]
df_e.text_block_w=df_e.text_block_w.apply(replace_new_line)
df_e.head()

(39314, 3)


Unnamed: 0,text_block_w,label
0,Statement of Work To Master Services Agreemen...,1
1,ce of the services. Commencement and Duration...,0
2,Duration The engagement is presently anticip...,0
3,iscounted Rate* (GBP) Estimated Effort (days)...,0
4,(days) Total Estimated Fees (GBP) Technica...,0


In [9]:
df_e.label.value_counts()

0    37086
1     2228
Name: label, dtype: int64

In [10]:
df2=df.loc[df.label==1,:]
df2=df2.append(df2)
df2=df2.append(df.loc[df.label==0,:].sample(20000))
df2=df2.sample(frac=1)
df2.label.value_counts()

0    20000
1    19924
Name: label, dtype: int64

In [11]:
train_len=df2.shape[0]
train_len

39924

In [12]:
df=df2.copy()
df.reset_index(inplace=True,drop=True)
df.shape

(39924, 2)

In [13]:
df.head()

Unnamed: 0,text_block_w,label
0,"and between Capital One Services, LLC (""Capit...",1
1,"equired or permitted to be given. If mailed, a...",0
2,clear slide N CONFIDENTIALITY AGREEMENT This A...,1
3,4/22/2019 GitHub Terms of Service - GitHub He...,1
4,clear slide CONFIDENTIALITY AGREEMENT This Agr...,1


In [14]:
df.tail()

Unnamed: 0,text_block_w,label
39919,ubject to an automatic three percent (3%) incr...,0
39920,"ing: Enabled TimeZone: (UTC) Dublin, Edinburg...",0
39921,DocuSign Envelope ID: 63F05300-CBD7-4315-9CC0-...,1
39922,Projektwerkzeugvertrag AD-Plate Cassette und ...,0
39923,"pril 1, 2018 to September 30, 2018. In the eve...",0


In [15]:
df=df.append(df_e)


In [16]:
df.reset_index(inplace=True,drop=True)
df.head()

Unnamed: 0,text_block_w,label
0,"and between Capital One Services, LLC (""Capit...",1
1,"equired or permitted to be given. If mailed, a...",0
2,clear slide N CONFIDENTIALITY AGREEMENT This A...,1
3,4/22/2019 GitHub Terms of Service - GitHub He...,1
4,clear slide CONFIDENTIALITY AGREEMENT This Agr...,1


In [17]:
df.tail()

Unnamed: 0,text_block_w,label
79233,T OF THIS AGREEMENT IS LIMITED TO THE SUM OF T...,0
79234,terminate (i) at the end of the Trial Period s...,0
79235,ogMeln your account. Notice is given (a) upon...,0
79236,"rier, on the second business day after notice ...",0
79237,"ve ordered, as set forth here: httDs://www.lom...",0


In [18]:
df_e.tail()

Unnamed: 0,text_block_w,label
39309,T OF THIS AGREEMENT IS LIMITED TO THE SUM OF T...,0
39310,terminate (i) at the end of the Trial Period s...,0
39311,ogMeln your account. Notice is given (a) upon...,0
39312,"rier, on the second business day after notice ...",0
39313,"ve ordered, as set forth here: httDs://www.lom...",0


In [19]:
df_e.head()

Unnamed: 0,text_block_w,label
0,Statement of Work To Master Services Agreemen...,1
1,ce of the services. Commencement and Duration...,0
2,Duration The engagement is presently anticip...,0
3,iscounted Rate* (GBP) Estimated Effort (days)...,0
4,(days) Total Estimated Fees (GBP) Technica...,0


In [20]:
df.iloc[train_len:train_len+5,:]

Unnamed: 0,text_block_w,label
39924,Statement of Work To Master Services Agreemen...,1
39925,ce of the services. Commencement and Duration...,0
39926,Duration The engagement is presently anticip...,0
39927,iscounted Rate* (GBP) Estimated Effort (days)...,0
39928,(days) Total Estimated Fees (GBP) Technica...,0


In [21]:
df.shape

(79238, 2)

In [22]:
df.columns=['texts','labels']
df.head()

Unnamed: 0,texts,labels
0,"and between Capital One Services, LLC (""Capit...",1
1,"equired or permitted to be given. If mailed, a...",0
2,clear slide N CONFIDENTIALITY AGREEMENT This A...,1
3,4/22/2019 GitHub Terms of Service - GitHub He...,1
4,clear slide CONFIDENTIALITY AGREEMENT This Agr...,1


In [23]:
df.labels.value_counts()

0    57086
1    22152
Name: labels, dtype: int64

In [24]:
df.to_csv('data/text_classification_dataset_2.csv',index=False)

In [25]:
!ls data/

25_eval_data.csv   text_classification_dataset_2.csv  xlnet_out_model
25_train_data.csv  text_classification_dataset.csv
ner_dataset.csv    xlnet-base-cased


## Load data

**Load CSV data**

In [26]:
data_path = "data/" 

In [27]:
data_file_address = "data/text_classification_dataset_2.csv"

In [28]:
df_data = pd.read_csv(data_file_address,sep=",",encoding="utf-8")

In [29]:
df_data.columns

Index(['texts', 'labels'], dtype='object')

In [30]:
df_data.head(n=20)

Unnamed: 0,texts,labels
0,"and between Capital One Services, LLC (""Capit...",1
1,"equired or permitted to be given. If mailed, a...",0
2,clear slide N CONFIDENTIALITY AGREEMENT This A...,1
3,4/22/2019 GitHub Terms of Service - GitHub He...,1
4,clear slide CONFIDENTIALITY AGREEMENT This Agr...,1
5,8 GENESYS Arjen Akkerman KPN Kromme Schaft3 39...,1
6,N NO: CAN002 TITLE OF CHANGE: Application Ra...,1
7,"lh September 2007 (the ""MSA"") including all pr...",0
8,"Execution Version March 1,2019 MUSEUM FINANC...",1
9,Harvey Name: Brent Looney Namezm Title: _VP ...,1


**Have a look labels**

In [31]:
df_data.labels.unique()

array([1, 0])

In [32]:
# Analyse the labels distribution
df_data.labels.value_counts()

0    57086
1    22152
Name: labels, dtype: int64

## Parser data

**Parser data into document structure**

In [33]:
# Get sentence data
sentences = df_data.texts.to_list()
sentences[0]

' and between Capital One Services, LLC ("Capital One and Doctor on Demand, Inc. (the Supplier as of December 18, 2015 (the "SOW Effective Date and is attached to incorporated in and made a part of that certain Master '

In [34]:
# Get tag labels data
labels = df_data.labels.to_list()
print(labels[0])

1


**Make TAG name into index for training**

In [35]:
# Set a dict for mapping id to tag name
# tag2idx = {t: i for i, t in enumerate(df_data.labels.unique())}

# Recommend to set it by manual define, good for reusing
# 0:negative, 1: positive
tag2idx={'0': 0,
  '1': 1}

In [36]:
tag2idx

{'0': 0, '1': 1}

In [37]:
# Mapping index to name
tag2name={tag2idx[key] : key for key in tag2idx.keys()}

In [38]:
tag2name

{0: '0', 1: '1'}

## Make tranning data

Make raw data into trainable data for XLNet, including:

- Set gpu environment
- Load tokenizer and tokenize
- Set 3 embedding, token embedding, mask word embedding, segmentation embedding
- Split data set into train and validate, then send them to dataloader

**Set up gpu environment**

In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()

In [40]:
n_gpu

1

### Load tokenizer

Remember to install sentencepiece with  'pip install sentencepiece'

In [41]:
# Manual define vocabulary address, if you download the model in local
# The vocabulary can download from "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model"
vocabulary = 'data/xlnet-base-cased/xlnet-base-cased-spiece.model'

In [42]:
# Len of the sentence must be the same as the training model
# See model's 'max_position_embeddings' = 512
max_len  = 64

In [43]:
# With cased model, set do_lower_case = False
tokenizer = XLNetTokenizer(vocab_file=vocabulary,do_lower_case=False)

### Set text input embedding

- token id embedding
- mask embedding
- segment embedding

The Embedding process was referred to [XLNet official repo](https://github.com/zihangdai/xlnet/blob/master/classifier_utils.py)


**This process is huge differnent from BERT**

In [44]:
max_len  = 64

full_input_ids = []
full_input_masks = []
full_segment_ids = []

SEG_ID_A   = 0
SEG_ID_B   = 1
SEG_ID_CLS = 2
SEG_ID_SEP = 3
SEG_ID_PAD = 4

UNK_ID = tokenizer.encode("<unk>")[0]
CLS_ID = tokenizer.encode("<cls>")[0]
SEP_ID = tokenizer.encode("<sep>")[0]
MASK_ID = tokenizer.encode("<mask>")[0]
EOD_ID = tokenizer.encode("<eod>")[0]

for i,sentence in enumerate(sentences):
    # Tokenize sentence to token id list
    tokens_a = tokenizer.encode(sentence)
    
    # Trim the len of text
    if(len(tokens_a)>max_len-2):
        tokens_a = tokens_a[:max_len-2]
        
        
    tokens = []
    segment_ids = []
    
    for token in tokens_a:
        tokens.append(token)
        segment_ids.append(SEG_ID_A)
        
    # Add <sep> token 
    tokens.append(SEP_ID)
    segment_ids.append(SEG_ID_A)
    
    
    # Add <cls> token
    tokens.append(CLS_ID)
    segment_ids.append(SEG_ID_CLS)
    
    input_ids = tokens
    
    # The mask has 0 for real tokens and 1 for padding tokens. Only real
    # tokens are attended to.
    input_mask = [0] * len(input_ids)

    # Zero-pad up to the sequence length at fornt
    if len(input_ids) < max_len:
        delta_len = max_len - len(input_ids)
        input_ids = [0] * delta_len + input_ids
        input_mask = [1] * delta_len + input_mask
        segment_ids = [SEG_ID_PAD] * delta_len + segment_ids

    assert len(input_ids) == max_len
    assert len(input_mask) == max_len
    assert len(segment_ids) == max_len
    
    full_input_ids.append(input_ids)
    full_input_masks.append(input_mask)
    full_segment_ids.append(segment_ids)
    
    if 3 > i:
        print("No.:%d"%(i))
        print("sentence: %s"%(sentence))
        print("input_ids:%s"%(input_ids))
        print("attention_masks:%s"%(input_mask))
        print("segment_ids:%s"%(segment_ids))
        print("\n")
    
    
    
    

No.:0
sentence:  and between Capital One Services, LLC ("Capital One and Doctor on Demand, Inc. (the Supplier as of December 18, 2015 (the "SOW Effective Date and is attached to incorporated in and made a part of that certain Master 
input_ids:[0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 161, 5293, 436, 2375, 19, 12154, 17, 10, 12, 19101, 12789, 436, 21, 7995, 31, 21853, 19, 1234, 9, 17, 10, 305, 30314, 34, 20, 683, 501, 19, 2607, 17, 10, 305, 17, 12, 83, 11077, 20920, 13450, 21, 27, 4564, 22, 7335, 25, 21, 140, 24, 188, 20, 29, 1028, 4384, 4, 3]
attention_masks:[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
segment_ids:[4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]


No.:1
sentence: equired or permitted to be g

### Set label embedding

In [45]:
# Make label into id
tags = [tag2idx[str(lab)] for lab in labels]
print(tags[0])

1


## Split data into train and validate

70% for training, 30% for validation

In [46]:
len(full_input_ids)

79238

**Split all data**

In [47]:
tr_inputs= full_input_ids[0:train_len]
val_inputs= full_input_ids[train_len:len(full_input_ids)]
tr_tags =tags[0:train_len]
val_tags=tags[train_len:len(tags)]
tr_masks =full_input_masks[0:train_len]
val_masks=full_input_masks[train_len:len(full_input_masks)]
tr_segs =full_segment_ids[0:train_len]
val_segs=full_segment_ids[train_len:len(full_segment_ids)]

In [48]:
# tr_inputs, val_inputs, tr_tags, val_tags,tr_masks, val_masks,tr_segs, val_segs = train_test_split(full_input_ids, tags,full_input_masks,full_segment_ids, 
#                                                             random_state=4, test_size=0.30)

In [49]:
len(tr_inputs),len(val_inputs),len(tr_segs),len(val_segs)

(39924, 39314, 39924, 39314)

In [51]:
sum(tags[0:train_len]),train_len

(19924, 39924)

**Set data into tensor**

Not recommend tensor.to(device) at this process, since it will run out of GPU memory

In [52]:
tr_inputs = torch.tensor(tr_inputs)
val_inputs = torch.tensor(val_inputs)
tr_tags = torch.tensor(tr_tags)
val_tags = torch.tensor(val_tags)
tr_masks = torch.tensor(tr_masks)
val_masks = torch.tensor(val_masks)
tr_segs = torch.tensor(tr_segs)
val_segs = torch.tensor(val_segs)

**Put data into data loader**

In [53]:
# Set batch num
batch_num = 32

In [54]:
# Set token embedding, attention embedding, segment embedding
train_data = TensorDataset(tr_inputs, tr_masks,tr_segs, tr_tags)
train_sampler = RandomSampler(train_data)
# Drop last can make batch training better for the last one
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_num,drop_last=True)

valid_data = TensorDataset(val_inputs, val_masks,val_segs, val_tags)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=batch_num)

## Train model

**Load XLNet model**

In [55]:
# In this document, contain confg(txt) and weight(bin) files
model_file_address = 'xlnet-base-cased'

In [56]:
# Will load config and weight with from_pretrained()
# Recommand download the model before using
# Download model from "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin"
# Download model from "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json" 
model = XLNetForSequenceClassification.from_pretrained(model_file_address,num_labels=len(tag2idx))

In [57]:
model;

In [58]:
# Set model to GPU,if you are using GPU machine
model.to(device);

In [59]:
# Add multi GPU support
if n_gpu >1:
    model = torch.nn.DataParallel(model)

In [60]:
# Set epoch and grad max num
epochs = 10
max_grad_norm = 1.0

In [61]:
# Cacluate train optimiazaion num
num_train_optimization_steps = int( math.ceil(len(tr_inputs) / batch_num) / 1) * epochs

### Set fine tuning method

**Manual optimizer**

In [62]:
# True: fine tuning all the layers 
# False: only fine tuning the classifier layers
# Since XLNet in 'pytorch_transformer' did not contian classifier layers
# FULL_FINETUNING = True need to set True
FULL_FINETUNING = True

In [63]:
if FULL_FINETUNING:
    # Fine tune model all layer parameters
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0}
    ]
else:
    # Only fine tune classifier parameters
    param_optimizer = list(model.classifier.named_parameters()) 
    optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]
optimizer = Adam(optimizer_grouped_parameters, lr=3e-5)

### Fine-tuing model

In [64]:
# TRAIN loop
model.train();

In [65]:
print("***** Running training *****")
print("  Num examples = %d"%(len(tr_inputs)))
print("  Batch size = %d"%(batch_num))
print("  Num steps = %d"%(num_train_optimization_steps))
for _ in trange(epochs,desc="Epoch"):
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    for step, batch in enumerate(train_dataloader):
        # add batch to gpu
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_segs,b_labels = batch
        
        # forward pass
        outputs = model(input_ids =b_input_ids,token_type_ids=b_segs, input_mask = b_input_mask,labels=b_labels)
        loss, logits = outputs[:2]
        if n_gpu>1:
            # When multi gpu, average it
            loss = loss.mean()
        
        # backward pass
        loss.backward()
        
        # track train loss
        tr_loss += loss.item()
        nb_tr_examples += b_input_ids.size(0)
        nb_tr_steps += 1
        
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
        
        # update parameters
        optimizer.step()
        optimizer.zero_grad()
        
    # print train loss per epoch
    print("Train loss: {}".format(tr_loss/nb_tr_steps))
        

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 39924
  Batch size = 32
  Num steps = 12480


Epoch:  10%|█         | 1/10 [06:06<54:59, 366.62s/it]

Train loss: 0.24941736265102815


Epoch:  20%|██        | 2/10 [12:15<48:58, 367.25s/it]

Train loss: 0.16753745539399176


Epoch:  30%|███       | 3/10 [18:24<42:55, 367.92s/it]

Train loss: 0.12460053123760768


Epoch:  40%|████      | 4/10 [24:30<36:43, 367.29s/it]

Train loss: 0.09614585031723154


Epoch:  50%|█████     | 5/10 [30:35<30:32, 366.48s/it]

Train loss: 0.08339189088648692


Epoch:  60%|██████    | 6/10 [36:39<24:22, 365.73s/it]

Train loss: 0.06760865063634316


Epoch:  70%|███████   | 7/10 [42:42<18:15, 365.09s/it]

Train loss: 0.056317288407633184


Epoch:  80%|████████  | 8/10 [48:46<12:09, 364.59s/it]

Train loss: 0.04888874859830668


Epoch:  90%|█████████ | 9/10 [54:49<06:04, 364.22s/it]

Train loss: 0.04509560076210651


Epoch: 100%|██████████| 10/10 [1:00:52<00:00, 363.94s/it]

Train loss: 0.03794099317464766





## Save model 

In [66]:
xlnet_out_address = 'data/xlnet_out_model/tc07'

In [67]:
# Make dir if not exits
if not os.path.exists(xlnet_out_address):
        os.makedirs(xlnet_out_address)

In [68]:
# Save a trained model, configuration and tokenizer
model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self

In [69]:
# If we save using the predefined names, we can load using `from_pretrained`
output_model_file = os.path.join(xlnet_out_address, "pytorch_model.bin")
output_config_file = os.path.join(xlnet_out_address, "config.json")

In [70]:
# Save model into file
torch.save(model_to_save.state_dict(), output_model_file)
model_to_save.config.to_json_file(output_config_file)
tokenizer.save_vocabulary(xlnet_out_address)

('data/xlnet_out_model/tc07/spiece.model',)

## Load model

In [71]:
model = XLNetForSequenceClassification.from_pretrained(xlnet_out_address,num_labels=len(tag2idx))

In [72]:
# Set model to GPU
model.to(device);

In [73]:
if n_gpu >1:
    model = torch.nn.DataParallel(model)

## Eval model

In [74]:
# Evalue loop
model.eval();

In [75]:
# Set acc funtion
def accuracy(out, labels):
    outputs = np.argmax(out, axis=1)
    return np.sum(outputs == labels)

In [76]:
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0

y_true = []
y_predict = []
print("***** Running evaluation *****")
print("  Num examples ={}".format(len(val_inputs)))
print("  Batch size = {}".format(batch_num))
for step, batch in enumerate(valid_dataloader):
    batch = tuple(t.to(device) for t in batch)
    b_input_ids, b_input_mask, b_segs,b_labels = batch
    
    with torch.no_grad():
        outputs = model(input_ids =b_input_ids,token_type_ids=b_segs, input_mask = b_input_mask,labels=b_labels)
        tmp_eval_loss, logits = outputs[:2]
    
    # Get textclassification predict result
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    tmp_eval_accuracy = accuracy(logits, label_ids)
#     print(tmp_eval_accuracy)
#     print(np.argmax(logits, axis=1))
#     print(label_ids)
    
    # Save predict and real label reuslt for analyze
    for predict in np.argmax(logits, axis=1):
        y_predict.append(predict)
        
    for real_result in label_ids.tolist():
        y_true.append(real_result)

    
    eval_loss += tmp_eval_loss.mean().item()
    eval_accuracy += tmp_eval_accuracy
   
    nb_eval_steps += 1
    
    
eval_loss = eval_loss / nb_eval_steps
eval_accuracy = eval_accuracy / len(val_inputs)
loss = tr_loss/nb_tr_steps 
result = {'eval_loss': eval_loss,
                  'eval_accuracy': eval_accuracy,
                  'loss': loss}
report = classification_report(y_pred=np.array(y_predict),y_true=np.array(y_true))
CFM=confusion_matrix(y_true=np.array(y_true), y_pred=np.array(y_predict))
# Save the report into file
output_eval_file = os.path.join(xlnet_out_address, "eval_results.txt")
with open(output_eval_file, "w") as writer:
    print("***** Eval results *****")
    for key in sorted(result.keys()):
        print("  %s = %s"%(key, str(result[key])))
        writer.write("%s = %s\n" % (key, str(result[key])))
        
    print(report)
    print(CFM)
    writer.write("\n\n")  
    writer.write(report)

***** Running evaluation *****
  Num examples =39314
  Batch size = 32
***** Eval results *****
  eval_accuracy = 0.9320343897848095
  eval_loss = 0.3457834610262351
  loss = 0.03794099317464766
              precision    recall  f1-score   support

           0       0.99      0.93      0.96     37086
           1       0.45      0.90      0.60      2228

    accuracy                           0.93     39314
   macro avg       0.72      0.92      0.78     39314
weighted avg       0.96      0.93      0.94     39314

[[34627  2459]
 [  213  2015]]
