In [1]:
%matplotlib inline

In [2]:
import numpy as np

class MF():

    def __init__(self, R, K, alpha, beta, iterations, test_samples, X=None, Y=None, Z=None, src_si_len=0, \
                 tgt_si_len=0, lang_pair_si_len=0, src_index=None, tgt_index=None, model=None, num_running=0):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - R (ndarray)                  : src-tgt language rating matrix
        - K (int)                      : number of latent dimensions
        - alpha (float)                : learning rate
        - beta (float)                 : regularization parameter
        - X (dict)                     : source language side information
        - Y (dict)                     : target language side information
        - Z (dict)                     : language pair side information
        - src_si_len(int)              : source language side information length
        - tgt_si_len(int)              : target language side information length
        - lang_pair_si_len(int)        : language pair side information length
        """

        self.R = np.array(R)
        self.Prediction = deepcopy(self.R)
        self.src_langs = R.index.tolist()
        self.tgt_langs = R.columns.tolist()
        self.num_src, self.num_tgt = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations
        self.X = X
        self.Y = Y
        self.Z = Z
        self.src_si_len = src_si_len
        self.tgt_si_len = tgt_si_len
        self.lang_pair_si_len = lang_pair_si_len
        self.test_samples = test_samples
        self.src_index=src_index 
        self.tgt_index=tgt_index
        self.model=model
        self.num_running = num_running
        self.score_dict = {'BLEU': "WIKI-MT", "Muse": "BLI-Muse", "Vecmap": "BLI-Vecmap"}
        self.traing_error_log=[]
        self.test_error_log=[]
        

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_src, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_tgt, self.K))

        # Initialize side information's parameter if necesary
        if self.X and self.src_si_len:
            self.A = np.random.normal(scale = 1./self.src_si_len, size=self.src_si_len)
        if self.Y and self.tgt_si_len:
            self.B = np.random.normal(scale = 1./self.tgt_si_len, size=self.tgt_si_len)
        if self.Z and self.lang_pair_si_len:
            self.C = np.random.normal(scale = 1./self.lang_pair_si_len, size=self.lang_pair_si_len)
        
        # Initialize the biases
        # the biases of users and items are initilized as 0
        # the bias of rating is initilized as mean value
        self.b_u = np.zeros(self.num_src)
        self.b_i = np.zeros(self.num_tgt)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # Create a list of training samples (where rating > 0)
        self.samples = []
        for i in range(self.num_src):
            for j in range(self.num_tgt):
                if self.R[i,j] > 0:
                    cur_tuple = [i, j, self.R[i, j]]
                    src_lang = self.src_langs[i]
                    tgt_lang = self.tgt_langs[j]
                    if self.X:
                        if src_lang in self.X.keys():
                            cur_tuple.append(self.X[src_lang])
                        else:
                            raise KeyError
                    if self.Y:
                        if tgt_lang in self.Y.keys():
                            cur_tuple.append(self.Y[src_lang])
                        else:
                            raise KeyError
                    if self.Z:
                        if src_lang + "_" + tgt_lang in self.Z.keys():
                            cur_tuple.append(self.Z[src_lang + "_" + tgt_lang])
                        else:
                            raise KeyError
                    self.samples.append(tuple(cur_tuple))

        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            # shuffle training samples
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 100 == 0:
                self.traing_error_log.append((i, mse))
                test_mse = self.evaluate_testing(self.test_samples, self.src_index, self.tgt_index, self.model)
                self.test_error_log.append((i, test_mse))
#                 print("\t\tIteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
#         predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x][y] - self.Prediction[x][y], 2)
        return np.sqrt(error / len(xs))

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for sample in self.samples:
            i, j, r = sample[0], sample[1], sample[2]
            # Computer prediction and error
            prediction = self.get_rating(sample)
            self.Prediction[i][j] = prediction
            e = (r - prediction)

            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])
            
            # Update side information parameter if necessary
            cur_index = 3
            if self.X:
                x = np.array(sample[cur_index], dtype=np.float64)
                cur_index += 1
                self.A += self.alpha * (e * x - self.beta * self.A)
            if self.Y:
                y = np.array(sample[cur_index], dtype=np.float64)
                cur_index += 1
                self.B += self.alpha * (e * y - self.beta * self.B)
            if self.Z:
                z = np.array(sample[cur_index], dtype=np.float64)
                self.C += self.alpha * (e * z - self.beta * self.C)
                

    def get_rating(self, sample):
        """
        Get the predicted rating of sample
        """
        i, j, r = sample[0], sample[1], sample[2]
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        cur_index = 3
        if self.X:
            x = sample[cur_index]
            cur_index += 1
            prediction += self.A.dot(x.T)
        if self.Y:
            y = sample[cur_index]
            cur_index += 1
            prediction += self.B.dot(y.T)
        if self.Z:
            z = sample[cur_index]
            prediction += self.C.dot(z.T)
        return prediction
    
    def evaluate_testing(self, test_data, src_index_name, tgt_index_name, score_index_name):
        """
        Predict the score for testing data
        """ 
        rmse = 0.0
        for record in test_data.iterrows():
            record = record[1]
            src_lang = record[src_index_name]
            tgt_lang = record[tgt_index_name]
            src_lang_index = self.src_langs.index(src_lang)
            tgt_lang_index = self.tgt_langs.index(tgt_lang)
            score = record[score_index_name]
            cur_tuple = [src_lang_index, tgt_lang_index, score]
            if self.X:
                if src_lang in self.X.keys():
                    cur_tuple.append(self.X[src_lang])
                else:
                    raise KeyError
            if self.Y:
                if tgt_lang in self.Y.keys():
                    cur_tuple.append(self.Y[src_lang])
                else:
                    raise KeyError
            if self.Z:
                if src_lang + "_" + tgt_lang in self.Z.keys():
                    cur_tuple.append(self.Z[src_lang + "_" + tgt_lang])
                else:
                    raise KeyError
            prediction = self.get_rating(tuple(cur_tuple))
            rmse += (prediction - score) * (prediction - score)
        return np.sqrt(rmse / len(test_data))
        

    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P, Q, A, B, and C
        """
        res = deepcopy(self.R)
        for i in range(self.num_src):
            for j in range(self.num_tgt):
                src_lang = self.src_langs[i]
                tgt_lang = self.tgt_langs[j]
                res[i][j] = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
                if self.X and src_lang in self.X.keys():
                    x = self.X[src_lang]
                    res[i][j] += self.A.dot(x.T)
                if self.Y and tgt_lang in self.Y.keys():
                    y = self.X[tgt_lang]
                    res[i][j] += self.A.dot(x.T)
                if self.Z and src_lang + "_" + tgt_lang in self.Z.keys():
                    z = self.X[src_lang + "_" + tgt_lang]
                    res[i][j] += self.A.dot(x.T)
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)
    
    def draw_error_curve(self, i):
        import matplotlib.pyplot as plt
        import numpy as np
        iters = []
        train_loss = []
        test_loss = []
        for item in self.traing_error_log:
            iters.append(item[0])
            train_loss.append(item[1])
        for item in self.test_error_log:
            test_loss.append(item[1])

        plt.plot(iters, train_loss, 'b', label='train loss')#'b'指：color='blue'
        plt.plot(iters, test_loss, 'r', label='test loss')#'r'指：color='red'

        plt.legend()  #显示上面的label
        plt.xlabel('Iteration')
        plt.ylabel('Loss')
        title = self.score_dict[self.model] + "_" + str(self.num_running+1) + "_Fold" + str(i+1)
        plt.title(title) 
        plt.savefig("../result/within_single_si/" + self.score_dict[self.model] + "/fold" + str(i+1) + "/" + title + ".png")

        #plt.ylim(-1,1)#仅设置y轴坐标范围
        plt.show()

        

Represent the two tasks above as matrices (BLEU scores for Wiki-MT and Accuracy for BLI) 

In [3]:
import pandas as pd
import numpy as np
np.random.seed(2021)

In [4]:
from copy import deepcopy
import pandas as pd
import random as rd

def split_k_fold_data(file_dir, score_index_name, src_index_name, tgt_index_name, k=5):
    data = pd.read_csv(file_dir)
    
    # shuffle
    data = data.sample(frac=1)
    
    # generate score matrix
    src_langs = data[src_index_name].unique()
    tgt_langs = data[tgt_index_name].unique()
    score_matrix = pd.DataFrame(index = src_langs, columns = tgt_langs)
    
    # eliminate empty rows and columns
    data = data.dropna(axis=1, how="all")
    data = data.dropna(axis=0, how="all")
    
    # K fold split
    k_fold_data = {}
    models = list(score_index_name)
    lens= len(data)
    
    for i, model in enumerate(models):
        ex_per_fold = int(np.ceil(lens / k))
        for j in range(k):
            start = ex_per_fold * j
            end = ex_per_fold * (j + 1)
            if j == 0:
                k_fold_data[model] = {"train": [], "test": []}
            k_fold_data[model]["train"].append(pd.concat([data.iloc[:start, :], data.iloc[end:, :]], axis=0))
            k_fold_data[model]["test"].append(data.iloc[start:end, :])
    return k_fold_data, score_matrix

def generate_score_matrix(train_data, src_index_name, tgt_index_name, score_index_name, origin_score_matrix):    
    score_matrix = deepcopy(origin_score_matrix)
    
    for record in train_data.iterrows():
        record = record[1]
        src_lang = record[src_index_name]
        tgt_lang = record[tgt_index_name]
        score = record[score_index_name]
        score_matrix.loc[src_lang, tgt_lang] = score
#         score_matrix[src_lang][tgt_lang] = score
    score_matrix.fillna(0, inplace=True)
    return score_matrix

def get_rmse(valid_data, model, src_index_name, tgt_index_name, score_matrix, train_matrix):
    rmse = 0.0
    src_langs = train_matrix.index.tolist()
    tgt_langs = train_matrix.columns.tolist()
    for cur_valid_data in valid_data.iterrows():
        cur_valid_data = cur_valid_data[1]
        src_lang, tgt_lang, score = cur_valid_data[src_index_name], cur_valid_data[tgt_index_name], cur_valid_data[model]
        src_idx = src_langs.index(src_lang)
        tgt_idx = tgt_langs.index(tgt_lang)
        prediction = score_matrix[src_idx][tgt_idx]
        rmse += (prediction - score) * (prediction - score)
    return np.sqrt(rmse / len(valid_data))

def get_result(alpha, beta, data_dir, scores, src_index, tgt_index, k, num_running, src_lang_side_info=None, \
               tgt_lang_side_info=None, lang_pair_side_info=None, src_si_len=0, tgt_si_len=0, \
               lang_pair_si_len=0):
    all_running_rmse = 0.0
    for nr in range(num_running):
        print("num running: " + str(nr+1))
        data, langs_matrix = split_k_fold_data(data_dir, scores, src_index, tgt_index, k)
        
        src_langs = langs_matrix.index.tolist()
        tgt_langs = langs_matrix.columns.tolist()
        
        res = {}
        for model in scores:
            print("-"*40)
            print(model)
            total_rmse = 0.0
            for i in range(k):
                print("\tFold {}: ".format(i+1))
                train_data, test_data = data[model]["train"][i], data[model]["test"][i]
                train_matrix = generate_score_matrix(train_data, src_index, tgt_index, model, langs_matrix)
                mf = MF(train_matrix, K=2, alpha=alpha, beta=beta, iterations=2000, test_samples=test_data, X=src_lang_side_info, \
                        Y=tgt_lang_side_info, Z=lang_pair_side_info, src_si_len=src_si_len, tgt_si_len=tgt_si_len, \
                        lang_pair_si_len=lang_pair_si_len, src_index=src_index, tgt_index=tgt_index, model=model, \
                       num_running=nr)
                trainging_log = mf.train()
#                 predictions = mf.full_matrix()
#                 cur_rmse = get_rmse(test_data, model, src_index, tgt_index, predictions, train_matrix)
                cur_rmse = mf.evaluate_testing(test_data, src_index, tgt_index, model)
#                 mf.draw_error_curve(i)
                total_rmse += cur_rmse
                print("\t\trmse is {}.".format(cur_rmse))
                print("*" * 20)
                
            average_rmse = total_rmse / k
            print("average rmse: " + str(average_rmse))
            res[model] = average_rmse
        res_rmse = 0.0
        for key, value in res.items():
            res_rmse += value
        all_running_rmse += res_rmse / len(scores)
    return all_running_rmse / num_running

def get_language_pair_side_info(data_dir, side_info_features, src_lang_name, tgt_lang_name):
    data = pd.read_csv(data_dir)
    side_dict = {}
    for record in data.iterrows():
        record = record[1]
        src_lang = record[src_lang_name]
        tgt_lang = record[tgt_lang_name]
        side_dict[src_lang + "_" + tgt_lang] = record[side_info_features].values
    return side_dict

def get_language_side_information(data_dir, src_lang_name, tgt_lang_name):
    
    data = pd.read_csv(data_dir)
    side_dict = {}
    src_langs = set()
    tgt_langs = set()
    
    for record in data.iterrows():
        record = record[1]
        src_lang = record[src_lang_name]
        tgt_lang = record[tgt_lang_name]
        src_langs.add(src_lang)
        tgt_langs.add(tgt_lang)
        
    src_langs = list(src_langs)
    tgt_langs = list(tgt_langs)
        
    return languages_to_features(src_langs), languages_to_features(tgt_langs)

def languages_to_features(languages_list):
    from sklearn.decomposition import PCA
    import lang2vec.lang2vec as l2v
    features_list = ["syntax_knn", "phonology_knn", "inventory_knn"]
    features = l2v.get_features(languages_list, features_list)
    
    features_matrix = []
    for language in languages_list:
        features_matrix.append(features[language])
        
    pca = PCA(n_components=6)   
    pca_features_matrix = pca.fit_transform(features_matrix)
    res = {}
    
    for i, language in enumerate(languages_list):
        res[languages_list[i]] = pca_features_matrix[i]
    return res

In [5]:
WIKI_MT_DIR = "../data/data_wiki_new.csv"
WIKI_SRC = "Source"
WIKI_TGT = "Target"
WIKI_SCORE = ["BLEU"]
WIKI_SIDE_FEATURES = ['geographic', 'genetic', 'inventory', 'syntactic', 'phonological', 'featural']

BLI_DIR = "../data/data_bli2_new.csv"
BLI_SRC = "Source Language Code"
BLI_TGT = "Target Language Code"
BLI_SCORE = ["Muse", "Vecmap"]
BLI_SIDE_FEATURES = ['genetic', 'syntactic', 'featural', 'phonological', 'inventory', 'geographic']
# The score metric is from "NLPerf/src/task_feats.py"

k = 5
num_running = 10

WIKI_SIDE_INFO_DICT = get_language_pair_side_info(WIKI_MT_DIR, WIKI_SIDE_FEATURES, WIKI_SRC, WIKI_TGT)
WIKI_SRC_SIDE_INFO_DICT, WIKI_TGT_SIDE_INFO_DICT = get_language_side_information(WIKI_MT_DIR, WIKI_SRC, WIKI_TGT)

BLI_SIDE_INFO_DICT = get_language_pair_side_info(BLI_DIR, BLI_SIDE_FEATURES, BLI_SRC, BLI_TGT)
BLI_SRC_SIDE_INFO_DICT, BLI_TGT_SIDE_INFO_DICT = get_language_side_information(BLI_DIR, BLI_SRC, BLI_TGT)



candidate_alpha = [0.001, 0.0005, 0.0001]
candidate_beta = [0.1, 0.01, 0.001, 0.0005]

gs_result = {}

for alpha in candidate_alpha:
    for beta in candidate_beta:

        wiki_result = get_result(alpha, beta, WIKI_MT_DIR, WIKI_SCORE, WIKI_SRC, WIKI_TGT, k, num_running, \
                                 src_lang_side_info=WIKI_SRC_SIDE_INFO_DICT, \
                                 tgt_lang_side_info=WIKI_TGT_SIDE_INFO_DICT, \
                                 src_si_len=6, tgt_si_len=6)
        bli_result = get_result(alpha, beta, BLI_DIR, BLI_SCORE, BLI_SRC, BLI_TGT, k, num_running, \
                                 src_lang_side_info=BLI_SRC_SIDE_INFO_DICT, \
                                 tgt_lang_side_info=BLI_TGT_SIDE_INFO_DICT, \
                                 src_si_len=6, tgt_si_len=6)
        gs_result[str(alpha) + "_" + str(beta)] = [wiki_result, bli_result]

# wiki_mt_score_matrix = generate_score_matrix(WIKI_MT_DIR, "Source", "Target", "BLEU")
# wiki_mt_score_matrix, wiki_mt_valid = generate_score_matrix(WIKI_MT_DIR, "Source", "Target", "BLEU")xdz
# bli_score_matrix, bli_valid = generate_score_matrix(BLI_DIR, "Source Language Code", "Target Language Code", ["Muse", "Vecmap"])

num running: 1
----------------------------------------
BLEU
	Fold 1: 
		rmse is 4.765466488923333.
********************
	Fold 2: 
		rmse is 3.358771553292337.
********************
	Fold 3: 
		rmse is 3.1978900982280054.
********************
	Fold 4: 
		rmse is 3.7761406062657206.
********************
	Fold 5: 
		rmse is 2.4656906345691234.
********************
average rmse: 3.5127918762557035
num running: 2
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.727124200467537.
********************
	Fold 2: 
		rmse is 4.359496455693107.
********************
	Fold 3: 
		rmse is 3.74527475546136.
********************
	Fold 4: 
		rmse is 2.8589640319382377.
********************
	Fold 5: 
		rmse is 3.2839436311097843.
********************
average rmse: 3.594960614934005
num running: 3
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.450743139381473.
********************
	Fold 2: 
		rmse is 3.3997147082984087.
********************
	Fold 3: 
		rmse is 3.10324

		rmse is 11.945249989721294.
********************
	Fold 2: 
		rmse is 7.994975249760553.
********************
	Fold 3: 
		rmse is 14.724045331818468.
********************
	Fold 4: 
		rmse is 14.364356406266007.
********************
	Fold 5: 
		rmse is 9.59961570792773.
********************
average rmse: 11.72564853709881
num running: 7
----------------------------------------
Muse
	Fold 1: 
		rmse is 12.68753933797284.
********************
	Fold 2: 
		rmse is 26.318058323035228.
********************
	Fold 3: 
		rmse is 39.58759180850381.
********************
	Fold 4: 
		rmse is 14.298752985522334.
********************
	Fold 5: 
		rmse is 28.97321378758192.
********************
average rmse: 24.373031248523226
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 9.748799045540698.
********************
	Fold 2: 
		rmse is 9.716220545413137.
********************
	Fold 3: 
		rmse is 11.116401337270748.
********************
	Fold 4: 
		rmse is 10.377402983194653.
***********

		rmse is 17.637312172588672.
********************
	Fold 2: 
		rmse is 14.395291978361913.
********************
	Fold 3: 
		rmse is 11.94720820985666.
********************
	Fold 4: 
		rmse is 14.880405602292367.
********************
	Fold 5: 
		rmse is 9.907080590307043.
********************
average rmse: 13.753459710681332
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 11.077359412140334.
********************
	Fold 2: 
		rmse is 13.840399234540312.
********************
	Fold 3: 
		rmse is 7.095012392638122.
********************
	Fold 4: 
		rmse is 9.854951802143335.
********************
	Fold 5: 
		rmse is 18.019861147011664.
********************
average rmse: 11.977516797694753
num running: 3
----------------------------------------
Muse
	Fold 1: 
		rmse is 11.863124581374143.
********************
	Fold 2: 
		rmse is 20.382388316478337.
********************
	Fold 3: 
		rmse is 14.508812358097348.
********************
	Fold 4: 
		rmse is 11.116783145049931.
******

		rmse is 2.821329765042456.
********************
	Fold 2: 
		rmse is 3.212425322350215.
********************
	Fold 3: 
		rmse is 3.0617589979005597.
********************
	Fold 4: 
		rmse is 3.7037766272660955.
********************
	Fold 5: 
		rmse is 8.843561224419979.
********************
average rmse: 4.328570387395861
num running: 5
----------------------------------------
BLEU
	Fold 1: 
		rmse is 2.6685761673740322.
********************
	Fold 2: 
		rmse is 3.3825458205011185.
********************
	Fold 3: 
		rmse is 3.8495520903931073.
********************
	Fold 4: 
		rmse is 12.764384030904203.
********************
	Fold 5: 
		rmse is 2.931626196851494.
********************
average rmse: 5.119336861204792
num running: 6
----------------------------------------
BLEU
	Fold 1: 
		rmse is 2.793135192849314.
********************
	Fold 2: 
		rmse is 8.344220144906208.
********************
	Fold 3: 
		rmse is 3.555849439155478.
********************
	Fold 4: 
		rmse is 3.6245601883807166

		rmse is 19.563333401944586.
********************
	Fold 2: 
		rmse is 13.690144889432057.
********************
	Fold 3: 
		rmse is 10.906686811005134.
********************
	Fold 4: 
		rmse is 45.025327772424234.
********************
	Fold 5: 
		rmse is 12.471049022302521.
********************
average rmse: 20.331308379421706
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 14.063981539399016.
********************
	Fold 2: 
		rmse is 20.353844063143708.
********************
	Fold 3: 
		rmse is 14.812526260548601.
********************
	Fold 4: 
		rmse is 23.759771322245435.
********************
	Fold 5: 
		rmse is 11.526924714933633.
********************
average rmse: 16.903409580054078
num running: 9
----------------------------------------
Muse
	Fold 1: 
		rmse is 16.301397797584816.
********************
	Fold 2: 
		rmse is 18.199028141001683.
********************
	Fold 3: 
		rmse is 11.640519229083116.
********************
	Fold 4: 
		rmse is 25.61417932928764.
***

		rmse is 63.22203417348659.
********************
	Fold 2: 
		rmse is 19.602633895431058.
********************
	Fold 3: 
		rmse is 10.221970370218637.
********************
	Fold 4: 
		rmse is 10.2677107055098.
********************
	Fold 5: 
		rmse is 11.616559065038837.
********************
average rmse: 22.986181641936987
num running: 4
----------------------------------------
Muse
	Fold 1: 
		rmse is 9.686961331316281.
********************
	Fold 2: 
		rmse is 12.065324919948155.
********************
	Fold 3: 
		rmse is 24.36286671779522.
********************
	Fold 4: 
		rmse is 12.89206370793035.
********************
	Fold 5: 
		rmse is 28.106454121284752.
********************
average rmse: 17.42273415965495
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 6.084345823965053.
********************
	Fold 2: 
		rmse is 9.388027082243799.
********************
	Fold 3: 
		rmse is 30.832345400922957.
********************
	Fold 4: 
		rmse is 14.007006017141387.
***********

		rmse is 2.9261359724374785.
********************
	Fold 2: 
		rmse is 2.797767713487789.
********************
	Fold 3: 
		rmse is 3.625464716504369.
********************
	Fold 4: 
		rmse is 2.9295533542676875.
********************
	Fold 5: 
		rmse is 4.167582169128562.
********************
average rmse: 3.289300785165177
num running: 8
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.189777634996172.
********************
	Fold 2: 
		rmse is 2.5445722680927507.
********************
	Fold 3: 
		rmse is 4.17428494849962.
********************
	Fold 4: 
		rmse is 3.351733514827567.
********************
	Fold 5: 
		rmse is 3.0906382522312383.
********************
average rmse: 3.270201323729469
num running: 9
----------------------------------------
BLEU
	Fold 1: 
		rmse is 4.015567831084168.
********************
	Fold 2: 
		rmse is 3.3534480033034186.
********************
	Fold 3: 
		rmse is 4.721760019616136.
********************
	Fold 4: 
		rmse is 2.8619730487762287.


		rmse is 15.143756235160746.
********************
	Fold 2: 
		rmse is 24.410391091990444.
********************
	Fold 3: 
		rmse is 15.341582340682052.
********************
	Fold 4: 
		rmse is 17.039394077308298.
********************
	Fold 5: 
		rmse is 12.54803661515363.
********************
average rmse: 16.896632072059035
num running: 10
----------------------------------------
Muse
	Fold 1: 
		rmse is 11.502087464976565.
********************
	Fold 2: 
		rmse is 41.02065699137693.
********************
	Fold 3: 
		rmse is 12.711636431719603.
********************
	Fold 4: 
		rmse is 12.63543458188637.
********************
	Fold 5: 
		rmse is 22.980998363041955.
********************
average rmse: 20.17016276660028
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 13.763675567403965.
********************
	Fold 2: 
		rmse is 12.972653177905594.
********************
	Fold 3: 
		rmse is 8.573985912449764.
********************
	Fold 4: 
		rmse is 12.21673212538462.
*******

		rmse is 15.676319851658503.
********************
	Fold 2: 
		rmse is 15.401178269649394.
********************
	Fold 3: 
		rmse is 10.453896374864124.
********************
	Fold 4: 
		rmse is 13.605481713940708.
********************
	Fold 5: 
		rmse is 41.943326064006854.
********************
average rmse: 19.416040454823918
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 12.827398210058705.
********************
	Fold 2: 
		rmse is 10.482334160777343.
********************
	Fold 3: 
		rmse is 6.106878019286446.
********************
	Fold 4: 
		rmse is 4.724017976049557.
********************
	Fold 5: 
		rmse is 10.595659965345533.
********************
average rmse: 8.947257666303518
num running: 6
----------------------------------------
Muse
	Fold 1: 
		rmse is 9.301500280454592.
********************
	Fold 2: 
		rmse is 21.823542432749193.
********************
	Fold 3: 
		rmse is 14.790763370132247.
********************
	Fold 4: 
		rmse is 27.428427618262265.
******

		rmse is 3.4329220025417175.
********************
	Fold 2: 
		rmse is 3.31925519042819.
********************
	Fold 3: 
		rmse is 3.1846164344512347.
********************
	Fold 4: 
		rmse is 2.634055794019446.
********************
	Fold 5: 
		rmse is 3.390777828853604.
********************
average rmse: 3.1923254500588385
num running: 1
----------------------------------------
Muse
	Fold 1: 
		rmse is 12.357784395271892.
********************
	Fold 2: 
		rmse is 35.34338280002662.
********************
	Fold 3: 
		rmse is 10.268933473536489.
********************
	Fold 4: 
		rmse is 13.21284468310847.
********************
	Fold 5: 
		rmse is 11.359367148012128.
********************
average rmse: 16.50846249999112
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 13.549173929956838.
********************
	Fold 2: 
		rmse is 19.59611929145336.
********************
	Fold 3: 
		rmse is 24.22138793740406.
********************
	Fold 4: 
		rmse is 6.951147478333242.
************

		rmse is 3.3139447444956573.
********************
	Fold 2: 
		rmse is 3.696161563979534.
********************
	Fold 3: 
		rmse is 2.857870453212778.
********************
	Fold 4: 
		rmse is 5.184520130860398.
********************
	Fold 5: 
		rmse is 3.4940950513817155.
********************
average rmse: 3.7093183887860173
num running: 2
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.1142267846316374.
********************
	Fold 2: 
		rmse is 3.6354376221237357.
********************
	Fold 3: 
		rmse is 6.425458932272007.
********************
	Fold 4: 
		rmse is 9.10638075083025.
********************
	Fold 5: 
		rmse is 2.7814958357313557.
********************
average rmse: 5.012599985117797
num running: 3
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.080049669964696.
********************
	Fold 2: 
		rmse is 3.6300147797255797.
********************
	Fold 3: 
		rmse is 3.3942475186505203.
********************
	Fold 4: 
		rmse is 3.128005150740658

		rmse is 29.644450560294082.
********************
	Fold 2: 
		rmse is 13.308645698115733.
********************
	Fold 3: 
		rmse is 20.230822079502094.
********************
	Fold 4: 
		rmse is 11.953213588150366.
********************
	Fold 5: 
		rmse is 15.717363050364074.
********************
average rmse: 18.170898995285267
num running: 7
----------------------------------------
Muse
	Fold 1: 
		rmse is 13.496411081364304.
********************
	Fold 2: 
		rmse is 12.912352297243824.
********************
	Fold 3: 
		rmse is 14.467220312432568.
********************
	Fold 4: 
		rmse is 27.924409811445884.
********************
	Fold 5: 
		rmse is 22.588014192918656.
********************
average rmse: 18.277681539081044
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 17.429287709664994.
********************
	Fold 2: 
		rmse is 14.084266526884488.
********************
	Fold 3: 
		rmse is 9.920420817141054.
********************
	Fold 4: 
		rmse is 11.038065910893696.
***

		rmse is 16.150070098044868.
********************
	Fold 2: 
		rmse is 15.448385246439054.
********************
	Fold 3: 
		rmse is 12.580741305285045.
********************
	Fold 4: 
		rmse is 10.154087922188534.
********************
	Fold 5: 
		rmse is 14.764920937200463.
********************
average rmse: 13.819641101831593
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 13.410704577300127.
********************
	Fold 2: 
		rmse is 9.113103736395018.
********************
	Fold 3: 
		rmse is 11.78484262564874.
********************
	Fold 4: 
		rmse is 13.251866361630793.
********************
	Fold 5: 
		rmse is 13.44518037164281.
********************
average rmse: 12.201139534523497
num running: 3
----------------------------------------
Muse
	Fold 1: 
		rmse is 11.187169983671954.
********************
	Fold 2: 
		rmse is 12.221579983551653.
********************
	Fold 3: 
		rmse is 12.720088657465773.
********************
	Fold 4: 
		rmse is 18.24083273279014.
******

		rmse is 3.0572641586142137.
********************
	Fold 2: 
		rmse is 3.7931813247809267.
********************
	Fold 3: 
		rmse is 3.2618630556482904.
********************
	Fold 4: 
		rmse is 3.153230165377813.
********************
	Fold 5: 
		rmse is 3.7962214881956.
********************
average rmse: 3.4123520385233688
num running: 5
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.194060547346765.
********************
	Fold 2: 
		rmse is 3.6100720927041965.
********************
	Fold 3: 
		rmse is 3.755315869240547.
********************
	Fold 4: 
		rmse is 3.79304549049023.
********************
	Fold 5: 
		rmse is 3.385043172217294.
********************
average rmse: 3.5475074343998068
num running: 6
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.0251730060303883.
********************
	Fold 2: 
		rmse is 3.4682437920691878.
********************
	Fold 3: 
		rmse is 2.9678028762988706.
********************
	Fold 4: 
		rmse is 4.100035910317054.

		rmse is 13.97235775629613.
********************
	Fold 2: 
		rmse is 9.409445614626348.
********************
	Fold 3: 
		rmse is 11.466860460060117.
********************
	Fold 4: 
		rmse is 12.781288717344557.
********************
	Fold 5: 
		rmse is 11.087060036634881.
********************
average rmse: 11.743402516992408
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 12.082971963701516.
********************
	Fold 2: 
		rmse is 11.297354310308533.
********************
	Fold 3: 
		rmse is 11.400295209669014.
********************
	Fold 4: 
		rmse is 11.077131407191915.
********************
	Fold 5: 
		rmse is 9.102471739159723.
********************
average rmse: 10.992044926006141
num running: 9
----------------------------------------
Muse
	Fold 1: 
		rmse is 19.74748277231228.
********************
	Fold 2: 
		rmse is 13.765321349631693.
********************
	Fold 3: 
		rmse is 12.911331369350883.
********************
	Fold 4: 
		rmse is 14.167680410913754.
******

		rmse is 15.27536955574484.
********************
	Fold 2: 
		rmse is 8.50600621529879.
********************
	Fold 3: 
		rmse is 11.75714482498835.
********************
	Fold 4: 
		rmse is 10.38719602579612.
********************
	Fold 5: 
		rmse is 12.806217028364982.
********************
average rmse: 11.746386730038617
num running: 4
----------------------------------------
Muse
	Fold 1: 
		rmse is 16.528475807305448.
********************
	Fold 2: 
		rmse is 19.693036999705704.
********************
	Fold 3: 
		rmse is 18.162393387763654.
********************
	Fold 4: 
		rmse is 10.78333510494887.
********************
	Fold 5: 
		rmse is 12.513175670374716.
********************
average rmse: 15.53608339401968
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 14.99430454577701.
********************
	Fold 2: 
		rmse is 9.287140692597072.
********************
	Fold 3: 
		rmse is 14.66337298205367.
********************
	Fold 4: 
		rmse is 11.904321337832558.
************

		rmse is 3.3389174683645373.
********************
	Fold 2: 
		rmse is 2.9630738066238234.
********************
	Fold 3: 
		rmse is 3.5951447328515944.
********************
	Fold 4: 
		rmse is 3.6192093530692877.
********************
	Fold 5: 
		rmse is 3.7402840735197582.
********************
average rmse: 3.4513258868858
num running: 8
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.523240311988416.
********************
	Fold 2: 
		rmse is 2.785801748079537.
********************
	Fold 3: 
		rmse is 3.111943714829037.
********************
	Fold 4: 
		rmse is 3.859777889735802.
********************
	Fold 5: 
		rmse is 3.4321369404696958.
********************
average rmse: 3.342580121020498
num running: 9
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.620217705268069.
********************
	Fold 2: 
		rmse is 3.279403096326389.
********************
	Fold 3: 
		rmse is 2.840539655253478.
********************
	Fold 4: 
		rmse is 3.3665574866012866.


		rmse is 11.942160192452974.
********************
	Fold 2: 
		rmse is 6.030496904769882.
********************
	Fold 3: 
		rmse is 15.967085461037328.
********************
	Fold 4: 
		rmse is 12.775369678974803.
********************
	Fold 5: 
		rmse is 8.1951225304184.
********************
average rmse: 10.982046953530679
num running: 10
----------------------------------------
Muse
	Fold 1: 
		rmse is 12.895362894184863.
********************
	Fold 2: 
		rmse is 14.208232056035822.
********************
	Fold 3: 
		rmse is 9.029965045621852.
********************
	Fold 4: 
		rmse is 18.96143953111151.
********************
	Fold 5: 
		rmse is 13.061578193266248.
********************
average rmse: 13.63131554404406
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 12.775860252298973.
********************
	Fold 2: 
		rmse is 11.800909954930283.
********************
	Fold 3: 
		rmse is 6.889498781973758.
********************
	Fold 4: 
		rmse is 16.180947285601363.
*********

In [6]:
print(wiki_result)
print(bli_result)

3.334936241718227
13.23969452570372


In [7]:
gs_result

{'0.001_0.1': [3.4681983357997277, 15.68542514797424],
 '0.001_0.01': [3.9454723937794633, 16.56433121731135],
 '0.001_0.001': [4.616665948795783, 18.632306811707156],
 '0.001_0.0005': [3.9489059558221733, 17.196953853478128],
 '0.0005_0.1': [3.312174019412238, 14.576881945903205],
 '0.0005_0.01': [3.744642161180644, 16.1632085535165],
 '0.0005_0.001': [3.8283679585363437, 15.915936005802882],
 '0.0005_0.0005': [4.013139140290097, 14.800869687812542],
 '0.0001_0.1': [3.3068489162804737, 13.591226492282857],
 '0.0001_0.01': [3.333717632906429, 13.280938268812722],
 '0.0001_0.001': [3.3352710020799714, 13.71099221409643],
 '0.0001_0.0005': [3.334936241718227, 13.23969452570372]}