# Evaluate PromCSE

# Installation

In [None]:
!pip install transformers==4.2.1

# Import Libraries

In [None]:
import sys
import io, os
import numpy as np
import logging
import argparse
from prettytable import PrettyTable
import torch
import transformers
from transformers import AutoConfig, AutoTokenizer


In [None]:
from dataclasses import dataclass, field
from typing import Optional, Union, List, Dict, Tuple

In [None]:
from transformers import (
    CONFIG_MAPPING,
    MODEL_FOR_MASKED_LM_MAPPING,
    AutoConfig,
    AutoModelForMaskedLM,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    DataCollatorWithPadding,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
    EvalPrediction,
    BertModel,
    BertForPreTraining,
    RobertaModel
)

# Configuration

In [None]:
# Set up logger
logging.basicConfig(format='%(asctime)s : %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
PROJECT_DIR = "/content/drive/MyDrive/patent"

In [None]:
DATA_DIR = PROJECT_DIR + '/data'

In [None]:
import sys
import os
py_file_location = PROJECT_DIR + "/py_files"
sys.path.append(os.path.abspath(py_file_location))

In [None]:
from promcse.models import RobertaForCL, BertForCL

In [None]:
import senteval

In [None]:
from senteval.tools.relatedness import RelatednessPytorch
from senteval.tools.validation import SplitClassifier
from senteval import utils

In [None]:

from senteval.binary import CREval, MREval, MPQAEval, SUBJEval
from senteval.snli import SNLIEval
from senteval.trec import TRECEval
from senteval.sick import SICKEntailmentEval, SICKEval
from senteval.mrpc import MRPCEval
from senteval.sts import STS12Eval, STS13Eval, STS14Eval, STS15Eval, STS16Eval, STSBenchmarkEval, SICKRelatednessEval, STSBenchmarkFinetune, STSEval
from senteval.sst import SSTEval
from senteval.rank import ImageCaptionRetrievalEval
from senteval.probing import *

In [None]:
from senteval.utils import cosine

In [None]:
def print_table(task_names, scores):
    tb = PrettyTable()
    tb.field_names = task_names
    tb.add_row(scores)
    print(tb)

In [None]:
MODEL_NAME = 'bert-base-uncased'
#MODEL_NAME = 'roberta-large'
#OUTPUT_NAME = 'nli_' + MODEL_NAME + '_sup-dcpcse'
OUTPUT_NAME = 'patent_' + MODEL_NAME + '_sup-dcpcse'


In [None]:
CACHE_DIR = DATA_DIR + '/cache'

In [None]:
sent0_col = 1
sent1_col = 2
score_col = 4

In [None]:
@dataclass
class ModelArguments:
    model_name_or_path: Optional[str] = field(
        default=DATA_DIR + '/results/' + OUTPUT_NAME,
        metadata={
            "help": "The model checkpoint for weights initialization."
        }
    )
    pooler_type: str = field(
        default="cls",
        metadata={
            "help": "What kind of pooler to use (cls, cls_before_pooler, avg, avg_top2, avg_first_last)."
        }
    ) 
    temp: float = field(
        default=0.05,
        metadata={
            "help": "Temperature for softmax."
        }
    )
    hard_negative_weight: float = field(
        default=0.0,
        metadata={
            "help": "The **logit** of weight for hard negatives (only effective if hard negatives are used)."
        }
    )
    do_mlm: bool = field(
        default=False,
        metadata={
            "help": "Whether to use MLM auxiliary objective."
        }
    )
    mlm_weight: float = field(
        default=0.1,
        metadata={
            "help": "Weight for MLM auxiliary objective (only effective if --do_mlm)."
        }
    )
    mlp_only_train: bool = field(
        default=False,
        metadata={
            "help": "Use MLP only during training"
        }
    )
    # Added - Begin
    pre_seq_len: int = field(
        default=10,
        metadata={
            "help": "The length of prompt"
        }
    )
    prefix_projection: bool = field(
        default=False,
        metadata={
            "help": "Apply a two-layer MLP head over the prefix embeddings"
        }
    ) 
    prefix_hidden_size: int = field(
        default=512,
        metadata={
            "help": "The hidden size of the MLP projection head in Prefix Encoder if prefix projection is used"
        }
    )
    do_eh_loss: bool = field(
        default=False,
        metadata={
            "help": "Whether to add Energy-based Hinge loss"
        }
    )
    eh_loss_margin: float = field(
        default=None,
        metadata={
            "help": "The margin of Energy-based Hinge loss"
        }
    )
    eh_loss_weight: float = field(
        default=None,
        metadata={
            "help": "The weight of Energy-based Hinge loss"
        }
    )

    cache_dir: Optional[str] = field(
        default=CACHE_DIR,
        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )
    model_revision: str = field(
        default="main",
        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
    )
    use_auth_token: bool = field(
        default=False,
        metadata={
            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
            "with private models)."
        },
    )
    mode: str = field(
        default="test",
        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
    )
    task_set: str = field(
        default="sts",
        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
    )


In [None]:
tasks = ['PatentEval', 'STS12', 'STS13', 'STS14', 'STS15', 'STS16',
                     'MR', 'CR', 'MPQA', 'SUBJ', 'SST2', 'TREC', 'MRPC',
                     'SICKRelatedness', 'STSBenchmark']

In [None]:
parser = HfArgumentParser((ModelArguments))

In [None]:
args, = parser.parse_args_into_dataclasses(args=[])
args

In [None]:
!ls /content/drive/MyDrive/patent/data/results/nli_bert-base-uncased_sup-dcpcse

In [None]:
config = AutoConfig.from_pretrained(args.model_name_or_path)

In [None]:
if 'roberta' in args.model_name_or_path:
    model = RobertaForCL.from_pretrained(
            args.model_name_or_path,
            from_tf=bool(".ckpt" in args.model_name_or_path),
            config=config,
            cache_dir=args.cache_dir,
            revision=args.model_revision,
            use_auth_token=True if args.use_auth_token else None,
            model_args=args                  
        )
elif 'bert' in args.model_name_or_path:
    model = BertForCL.from_pretrained(
            args.model_name_or_path,
            from_tf=bool(".ckpt" in args.model_name_or_path),
            config=config,
            cache_dir=args.cache_dir,
            revision=args.model_revision,
            use_auth_token=True if args.use_auth_token else None,
            model_args=args
        )


In [None]:
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
# Set up the tasks
if args.task_set == 'sts':
    args.tasks = ['PatentEval', 'STS12', 'STS13', 'STS14', 'STS15', 'STS16', 'STSBenchmark', 'SICKRelatedness']
elif args.task_set == 'transfer':
    args.tasks = ['MR', 'CR', 'MPQA', 'SUBJ', 'SST2', 'TREC', 'MRPC']
elif args.task_set == 'full':
    args.tasks = ['STS12', 'STS13', 'STS14', 'STS15', 'STS16', 'STSBenchmark', 'SICKRelatedness']
    args.tasks += ['MR', 'CR', 'MPQA', 'SUBJ', 'SST2', 'TREC', 'MRPC']


In [None]:
# Set params for SentEval
if args.mode == 'dev' or args.mode == 'fasttest':
    # Fast mode
    params = {'task_path': DATA_DIR, 'usepytorch': True, 'kfold': 5}
    params['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,
                                      'tenacity': 3, 'epoch_size': 2}
elif args.mode == 'test':
    # Full mode
    params = {'task_path': DATA_DIR, 'usepytorch': True, 'kfold': 10}
    params['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': 64,
                                      'tenacity': 5, 'epoch_size': 4}
else:
    raise NotImplementedError

In [None]:
# SentEval prepare and batcher
def prepare(params, samples):
    return

In [None]:
def batcher(params, batch, max_length=None):
    # Handle rare token encoding issues in the dataset
    if len(batch) >= 1 and len(batch[0]) >= 1 and isinstance(batch[0][0], bytes):
        batch = [[word.decode('utf-8') for word in s] for s in batch]

    sentences = [' '.join(s) for s in batch]

    # Tokenization
    if max_length is not None:
        batch = tokenizer.batch_encode_plus(
            sentences,
            return_tensors='pt',
            padding=True,
            max_length=max_length,
            truncation=True
        )
    else:
        batch = tokenizer.batch_encode_plus(
            sentences,
            return_tensors='pt',
            padding=True,
        )

    # Move to the correct device
    for k in batch:
        batch[k] = batch[k].to(device)
    
    # Get raw embeddings
    with torch.no_grad():
        outputs = model(**batch, output_hidden_states=True, return_dict=True, sent_emb=True)
        pooler_output = outputs.pooler_output
        return pooler_output.cpu()

    # # Apply different poolers
    # if args.pooler == 'cls':
    #     # There is a linear+activation layer after CLS representation
    #     return pooler_output.cpu()
    # elif args.pooler == 'cls_before_pooler':
    #     return last_hidden[:, 0].cpu()
    # elif args.pooler == "avg":
    #     return ((last_hidden * batch['attention_mask'].unsqueeze(-1)).sum(1) / batch['attention_mask'].sum(-1).unsqueeze(-1)).cpu()
    # elif args.pooler == "avg_first_last":
    #     first_hidden = hidden_states[0]
    #     last_hidden = hidden_states[-1]
    #     pooled_result = ((first_hidden + last_hidden) / 2.0 * batch['attention_mask'].unsqueeze(-1)).sum(1) / batch['attention_mask'].sum(-1).unsqueeze(-1)
    #     return pooled_result.cpu()
    # elif args.pooler == "avg_top2":
    #     second_last_hidden = hidden_states[-2]
    #     last_hidden = hidden_states[-1]
    #     pooled_result = ((last_hidden + second_last_hidden) / 2.0 * batch['attention_mask'].unsqueeze(-1)).sum(1) / batch['attention_mask'].sum(-1).unsqueeze(-1)
    #     return pooled_result.cpu()
    # else:
    #     raise NotImplementedError

In [None]:
VAL_SEPARATOR='|' # '\t'

In [None]:
class PatentFineTuneEval(object):
    def __init__(self, task_path, seed=1111):
        logging.debug('***** Transfer task : PatentFineTuneEval*****\n\n')
        self.seed = seed

        train = self.loadFile(os.path.join(task_path, 'patent_train.csv'))
        #dev = self.loadFile(os.path.join(task_path, 'patent_val.csv'))
        test = self.loadFile(os.path.join(task_path, 'patent_test.csv'))

        #self.sick_data = {'train': train, 'dev': dev, 'test': test}
        self.sick_data = {'train': train, 'test': test}

    def do_prepare(self, params, prepare):
        samples = self.sick_data['train']['X_A'] + self.sick_data['train']['X_B'] + \
                  self.sick_data['test']['X_A'] + self.sick_data['test']['X_B']
        return prepare(params, samples)

    def loadFile(self, fpath):
        skipFirstLine = True
        sick_data = {'X_A': [], 'X_B': [], 'y': []}
        with io.open(fpath, 'r', encoding='utf-8') as f:
            for line in f:
                if skipFirstLine:
                    skipFirstLine = False
                else:
                    text = line.strip().split(VAL_SEPARATOR)
                    sick_data['X_A'].append(text[6].split())
                    sick_data['X_B'].append(text[7].split())
                    sick_data['y'].append(text[5])

        sick_data['y'] = [float(s) for s in sick_data['y']]
        return sick_data

    def run(self, params, batcher):
        #sick_embed = {'train': {}, 'dev': {}, 'test': {}}
        sick_embed = {'train': {}, 'test': {}}
        bsize = params.batch_size

        for key in self.sick_data:
            logging.info('Computing embedding for {0}'.format(key))
            # Sort to reduce padding
            sorted_corpus = sorted(zip(self.sick_data[key]['X_A'],
                                       self.sick_data[key]['X_B'],
                                       self.sick_data[key]['y']),
                                   key=lambda z: (len(z[0]), len(z[1]), z[2]))

            self.sick_data[key]['X_A'] = [x for (x, y, z) in sorted_corpus]
            self.sick_data[key]['X_B'] = [y for (x, y, z) in sorted_corpus]
            self.sick_data[key]['y'] = [z for (x, y, z) in sorted_corpus]

            for txt_type in ['X_A', 'X_B']:
                sick_embed[key][txt_type] = []
                for ii in range(0, len(self.sick_data[key]['y']), bsize):
                    batch = self.sick_data[key][txt_type][ii:ii + bsize]
                    embeddings = batcher(params, batch)
                    sick_embed[key][txt_type].append(embeddings)
                sick_embed[key][txt_type] = np.vstack(sick_embed[key][txt_type])
            sick_embed[key]['y'] = np.array(self.sick_data[key]['y'])
            logging.info('Computed {0} embeddings'.format(key))

        # Train
        trainA = sick_embed['train']['X_A']
        trainB = sick_embed['train']['X_B']
        trainF = np.c_[np.abs(trainA - trainB), trainA * trainB]
        trainY = self.encode_labels(self.sick_data['train']['y'])

        # Dev
        #devA = sick_embed['dev']['X_A']
        #devB = sick_embed['dev']['X_B']
        #devF = np.c_[np.abs(devA - devB), devA * devB]
        #devY = self.encode_labels(self.sick_data['dev']['y'])

        # Test
        testA = sick_embed['test']['X_A']
        testB = sick_embed['test']['X_B']
        testF = np.c_[np.abs(testA - testB), testA * testB]
        testY = self.encode_labels(self.sick_data['test']['y'])

        config = {'seed': self.seed, 'nclasses': 5}
        clf = RelatednessPytorch(train={'X': trainF, 'y': trainY},
                                 valid={'X': devF, 'y': devY},
                                 test={'X': testF, 'y': testY},
                                 devscores=self.sick_data['dev']['y'],
                                 config=config)

        devspr, yhat = clf.run()

        pr = pearsonr(yhat, self.sick_data['test']['y'])[0]
        sr = spearmanr(yhat, self.sick_data['test']['y'])[0]
        pr = 0 if pr != pr else pr
        sr = 0 if sr != sr else sr
        se = mean_squared_error(yhat, self.sick_data['test']['y'])
        logging.debug('Dev : Spearman {0}'.format(devspr))
        logging.debug('Test : Pearson {0} Spearman {1} MSE {2} \
                       for Patent Relatedness\n'.format(pr, sr, se))

        return {'devspearman': devspr, 'pearson': pr, 'spearman': sr, 'mse': se,
                'yhat': yhat, 'ndev': len(devA), 'ntest': len(testA)}

    def encode_labels(self, labels, nclass=5):
        """
        Label encoding from Tree LSTM paper (Tai, Socher, Manning)
        """
        Y = np.zeros((len(labels), nclass)).astype('float32')
        for j, y in enumerate(labels):
            for i in range(nclass):
                if i+1 == np.floor(y) + 1:
                    Y[j, i] = y - np.floor(y)
                if i+1 == np.floor(y):
                    Y[j, i] = np.floor(y) - y + 1
        return Y



In [None]:
class PatentEval(STSEval):
    def __init__(self, task_path, seed=1111):
        logging.debug('\n\n***** Transfer task : PatentEval*****\n\n')
        self.seed = seed
        self.samples = []

        train = self.loadFile(os.path.join(task_path, 'patent_train.csv'))
        #dev = self.loadFile(os.path.join(task_path, 'patent_val.csv'))
        test = self.loadFile(os.path.join(task_path, 'patent_test.csv'))

        #self.datasets = ['train', 'dev', 'test']
        self.datasets = ['train', 'test']
        #self.data = {'train': train, 'dev': dev, 'test': test}
        self.data = {'train': train, 'test': test}
    
    def loadFile(self, fpath):
        skipFirstLine = True
        sick_data = {'X_A': [], 'X_B': [], 'y': []}
        with io.open(fpath, 'r', encoding='utf-8') as f:
            for line in f:
                if skipFirstLine:
                    skipFirstLine = False
                else:
                    text = line.strip().split(VAL_SEPARATOR)
                    
                    sick_data['X_A'].append(text[sent0_col].split())
                    sick_data['X_B'].append(text[sent1_col].split())
                    sick_data['y'].append(text[score_col])
                    
        sick_data['y'] = [float(s) for s in sick_data['y']]
        self.samples += sick_data['X_A'] + sick_data["X_B"]
        return (sick_data['X_A'], sick_data["X_B"], sick_data['y'])


In [None]:
class SE(object):
    def __init__(self, params, batcher, prepare=None):
        # parameters
        params = utils.dotdict(params)
        params.usepytorch = True if 'usepytorch' not in params else params.usepytorch
        params.seed = 1111 if 'seed' not in params else params.seed

        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.kfold = 5 if 'kfold' not in params else params.kfold

        if 'classifier' not in params or not params['classifier']:
            params.classifier = {'nhid': 0}

        assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!'

        self.params = params

        # batcher and prepare
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        self.list_tasks = [ 'PatentEval', 'CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC',
                           'SICKRelatedness', 'SICKEntailment', 'STSBenchmark',
                           'SNLI', 'ImageCaptionRetrieval', 'STS12', 'STS13',
                           'STS14', 'STS15', 'STS16',
                           'Length', 'WordContent', 'Depth', 'TopConstituents',
                           'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber',
                           'OddManOut', 'CoordinationInversion', 'SICKRelatedness-finetune', 'STSBenchmark-finetune', 'STSBenchmark-fix']

    def eval(self, name):
        # evaluate on evaluation [name], either takes string or list of strings
        if (isinstance(name, list)):
            self.results = {x: self.eval(x) for x in name}
            return self.results

        tpath = self.params.task_path
        assert name in self.list_tasks, str(name) + ' not in ' + str(self.list_tasks)

        # Original SentEval tasks
        if name == 'PatentEval':
            self.evaluation = PatentEval(tpath , seed=self.params.seed)
        elif name == 'CR':
            self.evaluation = CREval(tpath + '/downstream/CR', seed=self.params.seed)
        elif name == 'MR':
            self.evaluation = MREval(tpath + '/downstream/MR', seed=self.params.seed)
        elif name == 'MPQA':
            self.evaluation = MPQAEval(tpath + '/downstream/MPQA', seed=self.params.seed)
        elif name == 'SUBJ':
            self.evaluation = SUBJEval(tpath + '/downstream/SUBJ', seed=self.params.seed)
        elif name == 'SST2':
            self.evaluation = SSTEval(tpath + '/downstream/SST/binary', nclasses=2, seed=self.params.seed)
        elif name == 'SST5':
            self.evaluation = SSTEval(tpath + '/downstream/SST/fine', nclasses=5, seed=self.params.seed)
        elif name == 'TREC':
            self.evaluation = TRECEval(tpath + '/downstream/TREC', seed=self.params.seed)
        elif name == 'MRPC':
            self.evaluation = MRPCEval(tpath + '/downstream/MRPC', seed=self.params.seed)
        elif name == 'SICKRelatedness':
            self.evaluation = SICKRelatednessEval(tpath + '/downstream/SICK', seed=self.params.seed)
        elif name == 'STSBenchmark':
            self.evaluation = STSBenchmarkEval(tpath + '/downstream/STS/STSBenchmark', seed=self.params.seed)
        elif name == 'STSBenchmark-fix':
            self.evaluation = STSBenchmarkEval(tpath + '/downstream/STS/STSBenchmark-fix', seed=self.params.seed)
        elif name == 'STSBenchmark-finetune':
            self.evaluation = STSBenchmarkFinetune(tpath + '/downstream/STS/STSBenchmark', seed=self.params.seed)
        elif name == 'SICKRelatedness-finetune':
            self.evaluation = SICKEval(tpath + '/downstream/SICK', seed=self.params.seed)
        elif name == 'SICKEntailment':
            self.evaluation = SICKEntailmentEval(tpath + '/downstream/SICK', seed=self.params.seed)
        elif name == 'SNLI':
            self.evaluation = SNLIEval(tpath + '/downstream/SNLI', seed=self.params.seed)
        elif name in ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
            fpath = name + '-en-test'
            self.evaluation = eval(name + 'Eval')(tpath + '/downstream/STS/' + fpath, seed=self.params.seed)
        elif name == 'ImageCaptionRetrieval':
            self.evaluation = ImageCaptionRetrievalEval(tpath + '/downstream/COCO', seed=self.params.seed)

        # Probing Tasks
        elif name == 'Length':
                self.evaluation = LengthEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'WordContent':
                self.evaluation = WordContentEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'Depth':
                self.evaluation = DepthEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'TopConstituents':
                self.evaluation = TopConstituentsEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'BigramShift':
                self.evaluation = BigramShiftEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'Tense':
                self.evaluation = TenseEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'SubjNumber':
                self.evaluation = SubjNumberEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'ObjNumber':
                self.evaluation = ObjNumberEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'OddManOut':
                self.evaluation = OddManOutEval(tpath + '/probing', seed=self.params.seed)
        elif name == 'CoordinationInversion':
                self.evaluation = CoordinationInversionEval(tpath + '/probing', seed=self.params.seed)

        self.params.current_task = name
        self.evaluation.do_prepare(self.params, self.prepare)

        self.results = self.evaluation.run(self.params, self.batcher)

        return self.results

In [None]:
results = {}

In [None]:
for task in args.tasks:
    se = SE(params, batcher, prepare)
    result = se.eval(task)
    results[task] = result

In [None]:
# Print evaluation results
if args.mode == 'dev':
    print("------ %s ------" % (args.mode))

    task_names = []
    scores = []
    for task in ['STSBenchmark', 'SICKRelatedness']:
        task_names.append(task)
        if task in results:
            scores.append("%.2f" % (results[task]['dev']['spearman'][0] * 100))
        else:
            scores.append("0.00")
    print_table(task_names, scores)

    task_names = []
    scores = []
    for task in ['MR', 'CR', 'SUBJ', 'MPQA', 'SST2', 'TREC', 'MRPC']:
        task_names.append(task)
        if task in results:
            scores.append("%.2f" % (results[task]['devacc']))    
        else:
            scores.append("0.00")
    task_names.append("Avg.")
    scores.append("%.2f" % (sum([float(score) for score in scores]) / len(scores)))
    print_table(task_names, scores)

elif args.mode == 'test' or args.mode == 'fasttest':
    print("------ %s ------" % (args.mode))

    task_names = []
    scores = []
    for task in ['PatentEval', 'STS12', 'STS13', 'STS14', 'STS15', 'STS16', 'STSBenchmark', 'SICKRelatedness']:
        task_names.append(task)
        if task in results:
            if task in ['PatentEval','STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
                scores.append("%.2f" % (results[task]['all']['spearman']['all'] * 100))
            else:
                scores.append("%.2f" % (results[task]['test']['spearman'].correlation * 100))
        else:
            scores.append("0.00")
    task_names.append("Avg.")
    scores.append("%.2f" % (sum([float(score) for score in scores]) / len(scores)))
    print_table(task_names, scores)

    task_names = []
    scores = []
    for task in ['MR', 'CR', 'SUBJ', 'MPQA', 'SST2', 'TREC', 'MRPC']:
        task_names.append(task)
        if task in results:
            scores.append("%.2f" % (results[task]['acc']))    
        else:
            scores.append("0.00")
    task_names.append("Avg.")
    scores.append("%.2f" % (sum([float(score) for score in scores]) / len(scores)))
    print_table(task_names, scores)