## Imports

In [28]:
def evaluate(task, methods, verbose=False):
    """ Computes the weigthed Pearson and Spearman correlations of a task 
        using the specified methods"""
    pearson_correlations = {}
    spearman_correlations = {}
    
    for label, method in evaluations:
        task_pearson = []
        task_spearman = []
        task_weights = [] 
        for dataset in task.keys():
            sentences1, sentences2, gs = task[dataset]
            task_weights.append(len(gs))
            sims = method(sentences1, sentences2)
            task_pearson.append(pearsonr(sims, gs)[0])
            task_spearman.append(spearmanr(sims, gs)[0])

        wpearson = sum(task_pearson[i] * task_weights[i] / sum(task_weights) for i in range(len(task_weights)))
        wspearman =  sum(task_spearman[i] * task_weights[i] / sum(task_weights) for i in range(len(task_weights)))
       
        pearson_correlations[label] = wpearson
        spearman_correlations[label] = wspearman
        
    return pearson_correlations, spearman_correlations

## Modelos

In [None]:
from gensim.models import KeyedVectors 

In [33]:
PATH_WORD2VEC = './data/embedding/word2vec/GoogleNews-vectors-negative300.bin'
PATH_GLOVE = './data/embedding/glove/glove.840B.300d.w2v.txt'
PATH_FASTTEXT = './data/embedding/fasttext/crawl-300d-2M.vec'

In [4]:
word2vec = KeyedVectors.load_word2vec_format(PATH_WORD2VEC, binary=True)

In [36]:
glove = KeyedVectors.load_word2vec_format(PATH_GLOVE, binary=False)

## Datos

In [None]:
from load import loadSTS12, loadSTS13, loadSTS14, loadSTS15, loadSTS16

In [45]:
PATH_DATASETS = './data/datasets/STS'
PREPROCESSING =  {'lowercase':  False, 
                  'stop_words': False, 
                  'punctuation': False, 
                  'only_ascii': False, 
                  'lemmatization': False
                 }

In [46]:
sts12 = loadSTS12(PATH_DATASETS, PREPROCESSING)
sts13 = loadSTS13(PATH_DATASETS, PREPROCESSING)
sts14 = loadSTS14(PATH_DATASETS, PREPROCESSING)
sts15 = loadSTS15(PATH_DATASETS, PREPROCESSING)

***** Transfer task : STS12 *****


Preprocessing -MSRpar-
-MSRpar- preprocessed correctly
Preprocessing -MSRvid-
-MSRvid- preprocessed correctly
Preprocessing -SMTeuroparl-
-SMTeuroparl- preprocessed correctly
Preprocessing -surprise.OnWN-
-surprise.OnWN- preprocessed correctly
Preprocessing -surprise.SMTnews-
-surprise.SMTnews- preprocessed correctly
***** Transfer task : STS13 (-SMT) *****


Preprocessing -FNWN-
-FNWN- preprocessed correctly
Preprocessing -headlines-
-headlines- preprocessed correctly
Preprocessing -OnWN-
-OnWN- preprocessed correctly
***** Transfer task : STS14 *****


Preprocessing -deft-forum-
-deft-forum- preprocessed correctly
Preprocessing -deft-news-
-deft-news- preprocessed correctly
Preprocessing -headlines-
-headlines- preprocessed correctly
Preprocessing -images-
-images- preprocessed correctly
Preprocessing -OnWN-
-OnWN- preprocessed correctly
Preprocessing -tweet-news-
-tweet-news- preprocessed correctly
***** Transfer task : STS15 *****


Preprocessing

## MÃ©todos

In [None]:
from functools import partial

In [47]:
METHODS = [
    ("W2V + AVG", partial(avg_cosine, model=word2vec)),
    ("W2V + WMD", partial(wmd, model=word2vec)),
    ("GLOVE + AVG", partial(avg_cosine, model=glove)),
    ("GLOVE + WMD", partial(wmd, model=glove)),
    #("FASTTEXT + AVG", partial(avg_cosine, model=fasttext)),
    #("FASTTEX + WMD", partial(wmd, model=fasttext))
]

In [48]:
pearson, spearman = evaluate(sts12, METHODS)

In [49]:
pearson

{'W2V + AVG': 0.5576731761229754,
 'W2V + WMD': 0.4735133931943548,
 'GLOVE + AVG': 0.550325345521787,
 'GLOVE + WMD': 0.5511226507959358}