In [0]:
## Mount Drive into Colab
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/a3/78/92cedda05552398352ed9784908b834ee32a0bd071a9b32de287327370b7/transformers-2.8.0-py3-none-any.whl (563kB)
[K     |▋                               | 10kB 17.2MB/s eta 0:00:01[K     |█▏                              | 20kB 1.7MB/s eta 0:00:01[K     |█▊                              | 30kB 2.5MB/s eta 0:00:01[K     |██▎                             | 40kB 3.3MB/s eta 0:00:01[K     |███                             | 51kB 2.1MB/s eta 0:00:01[K     |███▌                            | 61kB 2.5MB/s eta 0:00:01[K     |████                            | 71kB 2.9MB/s eta 0:00:01[K     |████▋                           | 81kB 3.2MB/s eta 0:00:01[K     |█████▎                          | 92kB 2.5MB/s eta 0:00:01[K     |█████▉                          | 102kB 2.8MB/s eta 0:00:01[K     |██████▍                         | 112kB 2.8MB/s eta 0:00:01[K     |███████                         | 122kB 2.8M

In [0]:
import random
random.seed(0) 

In [0]:
import torch
from torch.utils.data import Dataset
from transformers import RobertaTokenizer
import numpy as np
import csv

def prepare_features(seq_1, tokenizer, max_seq_length = 32, 
             zero_pad = True, include_CLS_token = True, include_SEP_token = True):
    ## Tokenzine Input
    tokens_a = tokenizer.tokenize(seq_1)

    ## Truncate
    if len(tokens_a) > max_seq_length - 2:
        tokens_a = tokens_a[0:(max_seq_length - 2)]
    ## Initialize Tokens
    tokens = []
    if include_CLS_token:
        tokens.append(tokenizer.cls_token)
    ## Add Tokens and separators
    for token in tokens_a:
        tokens.append(token)

    if include_SEP_token:
        tokens.append(tokenizer.sep_token)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    ## Input Mask 
    input_mask = [1] * len(input_ids)
    ## Zero-pad sequence lenght
    if zero_pad:
        while len(input_ids) < max_seq_length:
            input_ids.append(0)
            input_mask.append(0)
    return torch.tensor(input_ids), torch.tensor(input_mask)


class CVEdatasetA(Dataset):

    def __init__(self, root='../Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training_Data/', maxlen=32):

        #Load data and labels
        print('Getting data from: ', root)
        fa = open(root+'subtaskA_answers.csv')
        fd = open(root+'subtaskA_data.csv')

        self.answers = []
        self.data = []
        for line in fa:
            l = line.split(',')
            self.answers.append((int(l[0]), int(l[1][:-1])))

        datareader = csv.reader(fd)    
        for row in datareader:
            if row[0] != 'id':
                self.data.append((int(row[0]), row[1], row[2]))

        #Initialize the BERT tokenizer
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-large', do_lower_case=True)

        self.maxlen = maxlen

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):

        #Selecting the sentence and label at the specified index in the data frame
        sent1 = self.data[index][1]
        sent2 = self.data[index][2]
        id_n = self.data[index][0]
        assert id_n == self.answers[index][0]
        answer = self.answers[index][1]
        
        #Construct target labels
        if answer == 1:
            label = torch.tensor([1,0])
        else:
            label = torch.tensor([0,1])

        #Preprocessing the text to be suitable for BERT
        tok_id1_tensor, attn_mask1 = prepare_features(sent1, self.tokenizer) #Tokenize the sentence
        tok_id2_tensor, attn_mask2 = prepare_features(sent2, self.tokenizer) #Tokenize the sentence
        # tok1 = ['[CLS]'] + tok1 + ['[SEP]'] #Insering the CLS and SEP token in the beginning and end of the sentence
        # tok2 = ['[CLS]'] + tok2 + ['[SEP]'] #Insering the CLS and SEP token in the beginning and end of the sentence
        
        # if len(tok1) < self.maxlen:
        #     tok1 = tok1 + ['[PAD]' for _ in range(self.maxlen - len(tok1))] #Padding sentences
        # else:
        #     tok1 = tok1[:self.maxlen-1] + ['[SEP]'] #Prunning the list to be of specified max length
        
        # if len(tok2) < self.maxlen:
        #     tok2 = tok2 + ['[PAD]' for _ in range(self.maxlen - len(tok2))] #Padding sentences
        # else:
        #     tok2 = tok2[:self.maxlen-1] + ['[SEP]'] #Prunning the list to be of specified max length

        # tok_id1 = self.tokenizer.convert_tokens_to_ids(tok1) #Obtaining the indices of the tokens in the BERT Vocabulary
        # tok_id1_tensor = torch.tensor(tok_id1) #Converting the list to a pytorch tensor
        # tok_id2 = self.tokenizer.convert_tokens_to_ids(tok2) #Obtaining the indices of the tokens in the BERT Vocabulary
        # tok_id2_tensor = torch.tensor(tok_id2) #Converting the list to a pytorch tensor
        
        # #Obtaining the attention mask i.e a tensor containing 1s for no padded tokens and 0s for padded ones
        # attn_mask1 = (tok_id1_tensor != 0).long()
        # attn_mask2 = (tok_id2_tensor != 0).long()
        
        return tok_id1_tensor, tok_id2_tensor, attn_mask1, attn_mask2, label, id_n

if __name__=='__main__':
    trainset = CVEdatasetA(root="drive/My Drive/Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training_Data/")
    valset = CVEdatasetA(root="drive/My Drive/Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Dev_Data/")
    a,b,c,d,e,f = valset.__getitem__(10)
    print(a.shape,c.shape,e.shape)

Getting data from:  drive/My Drive/Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training_Data/


HBox(children=(IntProgress(value=0, description='Downloading', max=898823, style=ProgressStyle(description_wid…




HBox(children=(IntProgress(value=0, description='Downloading', max=456318, style=ProgressStyle(description_wid…


Getting data from:  drive/My Drive/Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Dev_Data/
torch.Size([32]) torch.Size([32]) torch.Size([2])


In [0]:
import torch
import torch.nn as nn
from transformers import RobertaModel

class CVEclassifier(nn.Module):

    def __init__(self, freeze_roberta = False, hidden_dropout_prob=0.1, num_labels=1):
        super(CVEclassifier, self).__init__()
        #Instantiating BERT model object 
        self.roberta_layer = RobertaModel.from_pretrained('roberta-large')
        
        #Freeze bert layers
        if freeze_roberta:
            for p in self.bert_layer.parameters():
                p.requires_grad = False
        
        #Dropout
        self.dropout = nn.Dropout(hidden_dropout_prob)
        #Classifier layer
        #We are predicting scores for a sentence
        # self.relu = torch.nn.ReLU()
        # self.fc = nn.Linear(768, 256)
        self.classifier = nn.Linear(1024, num_labels)
        
    def forward(self, tok_id1_tensor, tok_id2_tensor, attn_mask1, attn_mask2):
        #Feeding the input to BERT model to obtain contextualized representations
        bert_hidden_states1, _ = self.roberta_layer(tok_id1_tensor, attention_mask = attn_mask1)
        bert_hidden_states2, _ = self.roberta_layer(tok_id2_tensor, attention_mask = attn_mask2)
        

        #Extract [CLS] embeddings
        sent_emb1 = bert_hidden_states1[:,0]
        sent_emb2 = bert_hidden_states2[:,0]

        # sent_1 = nn.AvgPool2d(kernel_size=7, stride=7, padding=0)
        # sent_emb1 = bert_hidden_states1.mean(1)
        # sent_emb2 = bert_hidden_states2.mean(1)
        # print(type(sent_1))
        # print(np.shape(sent_1))
        # print(type(sent_2))
        # print(np.shape(sent_2))
        
        
        #Calculate sentence scores/logit
        logit1 = self.classifier(self.dropout(sent_emb1))
        logit2 = self.classifier(self.dropout(sent_emb2))
        # mean = torch.div(torch.add(logit1, logit2), 2.0)
        # log1 = torch.sigmoid(500*torch.sub(logit1,mean))
        # log2 = torch.sigmoid(500*torch.sub(logit2,mean))
        # print(np.shape(logit1))
        # print(logit1, logit2)

        #Concatenate to get full logits
        logits = torch.cat((logit1, logit2), 1)
        # print(np.shape(logits))

        return logits


In [0]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from transformers import AdamW, get_linear_schedule_with_warmup

import os
import time
import datetime
import numpy as np

# from model import CVEclassifier
# from CVEdataset import CVEdatasetA

# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))



def train(model, criterion, optimizer, scheduler, train_dataloader, val_dataloader, device, epochs=6):

    # Store the average loss after each epoch so we can plot them.
    loss_values = []

    # For each epoch...
    for epoch_i in range(0, epochs):

        # ========================================
        #               Training
        # ========================================

        # Perform one full pass over the training set.

        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print('Training...')

        # Measure how long the training epoch takes.
        t0 = time.time()

        # Reset the total loss for this epoch.
        total_loss = 0

        # Put the model into training mode
        # `dropout` and `batchnorm` layers behave differently during training vs. test
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):

            # Progress update every 10 batches.
            if step % 20 == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)

                # Report progress.
                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))


            # unpack the batch received from train_dataloader
            b_input_id1 = batch[0].to(device)
            b_input_id2 = batch[1].to(device)
            b_input_mask1 = batch[2].to(device)
            b_input_mask2 = batch[3].to(device)
            b_labels = batch[4].to(device)

            # Clear any previously calculated gradients before performing a
            # backward pass. PyTorch doesn't do this automatically because 
            # accumulating the gradients is "convenient while training RNNs". 
            # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
            model.zero_grad()        

            # Perform a forward pass (evaluate the model on this training batch).
            # This will return the logits
            outputs = model(b_input_id1, b_input_id2, b_input_mask1, b_input_mask2)

            # The call to `model` returns logits
            # criterion requires class labels and not the one-hot vectors
            loss = criterion(outputs, torch.argmax(b_labels, dim=1))

            # Accumulate the training loss over all of the batches to calculate average loss
            total_loss += loss.item()

            # Perform a backward pass to calculate the gradients.
            loss.backward()

            # Clip the norm of the gradients to 1.0.
            # This is to help prevent the "exploding gradients" problem.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and take a step using the computed gradient.
            optimizer.step()

            # Update the learning rate.
            scheduler.step()

        # Calculate the average loss over the training data.
        avg_train_loss = total_loss / len(train_dataloader)            

        # Store the loss value for plotting the learning curve.
        loss_values.append(avg_train_loss)

        print("")
        print("  Average training loss: {0:.4f}".format(avg_train_loss))
        print("  Training epoch took: {:}".format(format_time(time.time() - t0)))

        # ========================================
        #               Validation
        # ========================================
        # After the completion of each training epoch, measure our performance on
        # our validation set.

        print("")
        print("Running Validation...")

        t0 = time.time()
        
        total_loss = 0

        # Put the model in evaluation mode--the dropout layers behave differently
        # during evaluation.
        model.eval()

        # Tracking variables 
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0

        # Evaluate data for one epoch
        for batch in val_dataloader:

            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)

            b_input_id1, b_input_id2, b_input_mask1, b_input_mask2, b_labels, _ = batch
            
            # Telling the model not to compute or store gradients, saving memory and
            # speeding up validation
            with torch.no_grad():        
                # Forward pass, calculate logit predictions.
                logits = model(b_input_id1, b_input_id2, b_input_mask1, b_input_mask2)
            
            # Calculate loss for the batch
            loss = criterion(logits, torch.argmax(b_labels, dim=1))

            # Accumulate the training loss over all of the batches to calculate average loss
            total_loss += loss.item()
            
            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = torch.argmax(b_labels, dim=1)
            label_ids = label_ids.to('cpu').numpy()

            # Calculate the accuracy for this batch of test sentences.
            tmp_eval_accuracy = flat_accuracy(logits, label_ids)

            # Accumulate the total accuracy.
            eval_accuracy += tmp_eval_accuracy

            # Track the number of batches
            nb_eval_steps += 1

        # Calculate the average loss over the training data.
        eval_loss = total_loss / len(val_dataloader)
        
        # Report the final accuracy for this validation run.
        print("  Accuracy: {0:.4f}".format(eval_accuracy/nb_eval_steps))
        print("  Average validation loss: {0:.4f}".format(eval_loss))
        print("  Validation took: {:}".format(format_time(time.time() - t0)))

    chkpt_dict = {'model_state_dist':model.state_dict(),
                    'optimizer_state_dict':optimizer.state_dict(),
                     'scheduler_state_dict':scheduler.state_dict()}
    
    torch.save(chkpt_dict, 'drive/My Drive/sem_eval/weights/'+'pretrained-roberta-ep-run2-'+str(epoch_i+1)+'-vacc-'+str(int(100*eval_accuracy/nb_eval_steps))+'.pt')
    
    print("")
    print("Training complete!")

if __name__=='__main__':
                                       
    if torch.cuda.is_available():    
        device = torch.device("cuda")
        print('There are %d GPU(s) available.' % torch.cuda.device_count())
        print('Using the GPU:', torch.cuda.get_device_name(0))
    else:
        print('No GPU available, using the CPU instead.')
        device = torch.device("cpu")
    
    trainset = CVEdatasetA(root='drive/My Drive/Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training_Data/')
    valset = CVEdatasetA(root='drive/My Drive/Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Dev_Data/')
    
    # Training logs
    os.makedirs('drive/My Drive/sem_eval/logs/', exist_ok=True)
    # Weight checkpoint
    os.makedirs('drive/My Drive/sem_eval/weights/', exist_ok=True)
    
    #Creating intsances of training and validation dataloaders
    train_dataloader = DataLoader(trainset, batch_size = 32, num_workers = 5, shuffle=True)
    val_dataloader = DataLoader(valset, batch_size = 128, num_workers = 5, shuffle=False)
    
    # chkpt = torch.load('drive/My Drive/sem_eval/weights/pretrain-3-vacc-85.pt')
    model =  CVEclassifier()
    # model.load_state_dict(chkpt['model_state_dist'])
    # model = CVEclassifier()
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    
    # Note: AdamW is a class from the huggingface library (as opposed to pytorch)
    # args.learning_rate - default is 5e-5, our notebook had 2e-5
    # args.adam_epsilon  - default is 1e-8
    optimizer = AdamW(model.parameters(), lr = 2e-5, eps = 1e-8)
    
    epochs = 3
    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * epochs
    warmup_steps = int(0.1*total_steps) # Can change warmup proportion, default value in run_glue.py
    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = warmup_steps, num_training_steps = total_steps)
                                       
    train(model, criterion, optimizer, scheduler, train_dataloader, val_dataloader, device, epochs)


There are 1 GPU(s) available.
Using the GPU: Tesla K80
Getting data from:  drive/My Drive/Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training_Data/
Getting data from:  drive/My Drive/Datasets/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Dev_Data/


HBox(children=(IntProgress(value=0, description='Downloading', max=482, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Downloading', max=1425941629, style=ProgressStyle(description…



Training...
  Batch    20  of    313.    Elapsed: 0:00:49.
  Batch    40  of    313.    Elapsed: 0:01:38.
  Batch    60  of    313.    Elapsed: 0:02:27.
  Batch    80  of    313.    Elapsed: 0:03:16.
  Batch   100  of    313.    Elapsed: 0:04:05.
  Batch   120  of    313.    Elapsed: 0:04:54.
  Batch   140  of    313.    Elapsed: 0:05:43.
  Batch   160  of    313.    Elapsed: 0:06:32.
  Batch   180  of    313.    Elapsed: 0:07:21.
  Batch   200  of    313.    Elapsed: 0:08:10.
  Batch   220  of    313.    Elapsed: 0:08:59.
  Batch   240  of    313.    Elapsed: 0:09:48.
  Batch   260  of    313.    Elapsed: 0:10:37.
  Batch   280  of    313.    Elapsed: 0:11:25.
  Batch   300  of    313.    Elapsed: 0:12:14.

  Average training loss: 0.4179
  Training epoch took: 0:12:45

Running Validation...
  Accuracy: 0.9413
  Average validation loss: 0.1412
  Validation took: 0:00:23

Training...
  Batch    20  of    313.    Elapsed: 0:00:49.
  Batch    40  of    313.    Elapsed: 0:01:38.
  Batch

In [0]:
# torch.cuda.empty_cache()

In [0]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from transformers import AdamW, get_linear_schedule_with_warmup

import os
import time
import datetime
import numpy as np

# from model import CVEclassifier
# from CVEdataset import CVEdatasetA

# def format_time(elapsed):
#     '''
#     Takes a time in seconds and returns a string hh:mm:ss
#     '''
#     # Round to the nearest second.
#     elapsed_rounded = int(round((elapsed)))
    
#     # Format as hh:mm:ss
#     return str(datetime.timedelta(seconds=elapsed_rounded))

def test():
  if torch.cuda.is_available():    
      device = torch.device("cuda")
      print('There are %d GPU(s) available.' % torch.cuda.device_count())
      print('Using the GPU:', torch.cuda.get_device_name(0))
  else:
      print('No GPU available, using the CPU instead.')
      device = torch.device("cpu")
          
  valset = CVEdatasetA(root='drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Test Data/')
  val_dataloader = DataLoader(valset, batch_size = 1, num_workers = 1, shuffle=False)

  chkpt = torch.load('drive/My Drive/sem_eval/weights/roberta-ep-run2-3-vacc-95.pt')
  model =  CVEclassifier()
  model.load_state_dict(chkpt['model_state_dist'])
  model.to(device)

  criterion = nn.CrossEntropyLoss()
  with open('drive/My Drive/sem_eval/logs/roberta-details.csv', 'w') as f:
      f.write('id'+';'+'sent0'+';'+'sent1'+';'+'true_label'+';'+'pred_label'+'\n')


  print("Running Validation...")

  t0 = time.time()

  total_loss = 0

  # Put the model in evaluation mode--the dropout layers behave differently
  # during evaluation.
  model.eval()

  # Tracking variables 
  eval_loss, eval_accuracy = 0, 0
  nb_eval_steps, nb_eval_examples = 0, 0
  true_labels = []
  pred_labels = []
  cls_losses = []

  # Evaluate data for one epoch
  for batch in val_dataloader:

      # Add batch to GPU
      batch = tuple(t.to(device) for t in batch)

      b_input_id1, b_input_id2, b_input_mask1, b_input_mask2, b_labels, id_n = batch

      # Telling the model not to compute or store gradients, saving memory and
      # speeding up validation
      with torch.no_grad():        
          # Forward pass, calculate logit predictions.
          logits = model(b_input_id1, b_input_id2, b_input_mask1, b_input_mask2)

      # Calculate loss for the batch
      loss = criterion(logits, torch.argmax(b_labels, dim=1))

      # Accumulate the training loss over all of the batches to calculate average loss
      total_loss += loss.item()
      cls_losses.append((id_n, loss.item()))

      # Move logits and labels to CPU
      pred_label = torch.argmax(logits, dim=1)
      pred_label = pred_label.to('cpu').numpy()
  #     logits = logits.detach().cpu().numpy()
      true_label = torch.argmax(b_labels, dim=1)
      true_label = true_label.to('cpu').numpy()
      id_n = id_n.to('cpu').numpy()
      
      pred_labels.append((id_n, pred_label))
      true_labels.append((id_n, true_label))
      
      if true_label == pred_label:
          eval_accuracy += 1
      else:
          print(str(id_n[0])+' misclassified', true_label, pred_label)
          with open('drive/My Drive/sem_eval/logs/roberta-details.csv', 'a') as f:
              f.write(str(id_n[0])+';'+valset.data[nb_eval_steps][1]+';'+valset.data[nb_eval_steps][2]+';'+str(true_label[0])+';'+str(pred_label[0])+'\n')
      # Track the number of batches
      nb_eval_steps += 1

  # Calculate the average loss over the training data.
  eval_loss = total_loss / len(val_dataloader)


  # Report the final accuracy for this validation run.
  print("  Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
  print("  Average validation loss: {0:.2f}".format(eval_loss))
  print("  Validation took: {:}".format(format_time(time.time() - t0)))

if __name__=='__main__':
  test()

There are 1 GPU(s) available.
Using the GPU: Tesla P100-PCIE-16GB
Getting data from:  drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Test Data/
Running Validation...
1964 misclassified [1] [0]
174 misclassified [1] [0]
1695 misclassified [0] [1]
1886 misclassified [0] [1]
543 misclassified [1] [0]
1172 misclassified [1] [0]
259 misclassified [1] [0]
896 misclassified [1] [0]
1704 misclassified [0] [1]
1817 misclassified [1] [0]
1421 misclassified [1] [0]
2 misclassified [0] [1]
357 misclassified [1] [0]
1604 misclassified [1] [0]
1558 misclassified [1] [0]
732 misclassified [1] [0]
938 misclassified [0] [1]
1828 misclassified [1] [0]
1656 misclassified [1] [0]
526 misclassified [1] [0]
827 misclassified [0] [1]
1770 misclassified [1] [0]
76 misclassified [0] [1]
1061 misclassified [1] [0]
283 misclassified [0] [1]
530 misclassified [0] [1]
1331 misclassified [1] [0]
713 misclassified [1] [0]
720 misclassified [1] [0]
1814 misclassified [0] [1]
794 misclassified