In [1]:
%matplotlib inline

In [2]:
import numpy as np

class MF():

    def __init__(self, R, K, alpha, beta, iterations, test_samples, X=None, Y=None, Z=None, src_si_len=0, \
                 tgt_si_len=0, lang_pair_si_len=0, src_index=None, tgt_index=None, model=None, num_running=0):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - R (ndarray)                  : src-tgt language rating matrix
        - K (int)                      : number of latent dimensions
        - alpha (float)                : learning rate
        - beta (float)                 : regularization parameter
        - X (dict)                     : source language side information
        - Y (dict)                     : target language side information
        - Z (dict)                     : language pair side information
        - src_si_len(int)              : source language side information length
        - tgt_si_len(int)              : target language side information length
        - lang_pair_si_len(int)        : language pair side information length
        """

        self.R = np.array(R)
        self.Prediction = deepcopy(self.R)
        self.src_langs = R.index.tolist()
        self.tgt_langs = R.columns.tolist()
        self.num_src, self.num_tgt = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations
        self.X = X
        self.Y = Y
        self.Z = Z
        self.src_si_len = src_si_len
        self.tgt_si_len = tgt_si_len
        self.lang_pair_si_len = lang_pair_si_len
        self.test_samples = test_samples
        self.src_index=src_index 
        self.tgt_index=tgt_index
        self.model=model
        self.num_running = num_running
        self.score_dict = {'BLEU': "WIKI-MT", "Muse": "BLI-Muse", "Vecmap": "BLI-Vecmap"}
        self.traing_error_log=[]
        self.test_error_log=[]
        

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_src, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_tgt, self.K))

        # Initialize side information's parameter if necesary
        if self.X and self.src_si_len:
            self.A = np.random.normal(scale = 1./self.src_si_len, size=self.src_si_len)
        if self.Y and self.tgt_si_len:
            self.B = np.random.normal(scale = 1./self.tgt_si_len, size=self.tgt_si_len)
        if self.Z and self.lang_pair_si_len:
            self.C = np.random.normal(scale = 1./self.lang_pair_si_len, size=self.lang_pair_si_len)
        
        # Initialize the biases
        # the biases of users and items are initilized as 0
        # the bias of rating is initilized as mean value
        self.b_u = np.zeros(self.num_src)
        self.b_i = np.zeros(self.num_tgt)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # Create a list of training samples (where rating > 0)
        self.samples = []
        for i in range(self.num_src):
            for j in range(self.num_tgt):
                if self.R[i,j] > 0:
                    cur_tuple = [i, j, self.R[i, j]]
                    src_lang = self.src_langs[i]
                    tgt_lang = self.tgt_langs[j]
                    if self.X:
                        if src_lang in self.X.keys():
                            cur_tuple.append(self.X[src_lang])
                        else:
                            raise KeyError
                    if self.Y:
                        if tgt_lang in self.Y.keys():
                            cur_tuple.append(self.Y[src_lang])
                        else:
                            raise KeyError
                    if self.Z:
                        if src_lang + "_" + tgt_lang in self.Z.keys():
                            cur_tuple.append(self.Z[src_lang + "_" + tgt_lang])
                        else:
                            raise KeyError
                    self.samples.append(tuple(cur_tuple))

        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            # shuffle training samples
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 100 == 0:
                self.traing_error_log.append((i, mse))
                test_mse = self.evaluate_testing(self.test_samples, self.src_index, self.tgt_index, self.model)
                self.test_error_log.append((i, test_mse))
#                 print("\t\tIteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
#         predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x][y] - self.Prediction[x][y], 2)
        return np.sqrt(error / len(xs))

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for sample in self.samples:
            i, j, r = sample[0], sample[1], sample[2]
            # Computer prediction and error
            prediction = self.get_rating(sample)
            self.Prediction[i][j] = prediction
            e = (r - prediction)

            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])
            
            # Update side information parameter if necessary
            cur_index = 3
            if self.X:
                x = np.array(sample[cur_index], dtype=np.float64)
                cur_index += 1
                self.A += self.alpha * (e * x - self.beta * self.A)
            if self.Y:
                y = np.array(sample[cur_index], dtype=np.float64)
                cur_index += 1
                self.B += self.alpha * (e * y - self.beta * self.B)
            if self.Z:
                z = np.array(sample[cur_index], dtype=np.float64)
                self.C += self.alpha * (e * z - self.beta * self.C)
                

    def get_rating(self, sample):
        """
        Get the predicted rating of sample
        """
        i, j, r = sample[0], sample[1], sample[2]
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        cur_index = 3
        if self.X:
            x = sample[cur_index]
            cur_index += 1
            prediction += self.A.dot(x.T)
        if self.Y:
            y = sample[cur_index]
            cur_index += 1
            prediction += self.B.dot(y.T)
        if self.Z:
            z = sample[cur_index]
            prediction += self.C.dot(z.T)
        return prediction
    
    def evaluate_testing(self, test_data, src_index_name, tgt_index_name, score_index_name):
        """
        Predict the score for testing data
        """ 
        rmse = 0.0
        for record in test_data.iterrows():
            record = record[1]
            src_lang = record[src_index_name]
            tgt_lang = record[tgt_index_name]
            src_lang_index = self.src_langs.index(src_lang)
            tgt_lang_index = self.tgt_langs.index(tgt_lang)
            score = record[score_index_name]
            cur_tuple = [src_lang_index, tgt_lang_index, score]
            if self.X:
                if src_lang in self.X.keys():
                    cur_tuple.append(self.X[src_lang])
                else:
                    raise KeyError
            if self.Y:
                if tgt_lang in self.Y.keys():
                    cur_tuple.append(self.Y[src_lang])
                else:
                    raise KeyError
            if self.Z:
                if src_lang + "_" + tgt_lang in self.Z.keys():
                    cur_tuple.append(self.Z[src_lang + "_" + tgt_lang])
                else:
                    raise KeyError
            prediction = self.get_rating(tuple(cur_tuple))
            rmse += (prediction - score) * (prediction - score)
        return np.sqrt(rmse / len(test_data))
        

    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P, Q, A, B, and C
        """
        res = deepcopy(self.R)
        for i in range(self.num_src):
            for j in range(self.num_tgt):
                src_lang = self.src_langs[i]
                tgt_lang = self.tgt_langs[j]
                res[i][j] = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
                if self.X and src_lang in self.X.keys():
                    x = self.X[src_lang]
                    res[i][j] += self.A.dot(x.T)
                if self.Y and tgt_lang in self.Y.keys():
                    y = self.X[tgt_lang]
                    res[i][j] += self.A.dot(x.T)
                if self.Z and src_lang + "_" + tgt_lang in self.Z.keys():
                    z = self.X[src_lang + "_" + tgt_lang]
                    res[i][j] += self.A.dot(x.T)
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)
    
    def draw_error_curve(self, i):
        import matplotlib.pyplot as plt
        import numpy as np
        iters = []
        train_loss = []
        test_loss = []
        for item in self.traing_error_log:
            iters.append(item[0])
            train_loss.append(item[1])
        for item in self.test_error_log:
            test_loss.append(item[1])

        plt.plot(iters, train_loss, 'b', label='train loss')#'b'指：color='blue'
        plt.plot(iters, test_loss, 'r', label='test loss')#'r'指：color='red'

        plt.legend()  #显示上面的label
        plt.xlabel('Iteration')
        plt.ylabel('Loss')
        title = self.score_dict[self.model] + "_" + str(self.num_running+1) + "_Fold" + str(i+1)
        plt.title(title) 
        plt.savefig("../result/within_pair_single_si/" + self.score_dict[self.model] + "/fold" + str(i+1) + "/" + title + ".png")

        #plt.ylim(-1,1)#仅设置y轴坐标范围
        plt.show()

        

Represent the two tasks above as matrices (BLEU scores for Wiki-MT and Accuracy for BLI) 

In [3]:
import pandas as pd
import numpy as np
np.random.seed(2021)

In [4]:
from copy import deepcopy
import pandas as pd
import random as rd

def split_k_fold_data(file_dir, score_index_name, src_index_name, tgt_index_name, k=5):
    data = pd.read_csv(file_dir)
    
    # shuffle
    data = data.sample(frac=1)
    
    # generate score matrix
    src_langs = data[src_index_name].unique()
    tgt_langs = data[tgt_index_name].unique()
    score_matrix = pd.DataFrame(index = src_langs, columns = tgt_langs)
    
    # eliminate empty rows and columns
    data = data.dropna(axis=1, how="all")
    data = data.dropna(axis=0, how="all")
    
    # K fold split
    k_fold_data = {}
    models = list(score_index_name)
    lens= len(data)
    
    for i, model in enumerate(models):
        ex_per_fold = int(np.ceil(lens / k))
        for j in range(k):
            start = ex_per_fold * j
            end = ex_per_fold * (j + 1)
            if j == 0:
                k_fold_data[model] = {"train": [], "test": []}
            k_fold_data[model]["train"].append(pd.concat([data.iloc[:start, :], data.iloc[end:, :]], axis=0))
            k_fold_data[model]["test"].append(data.iloc[start:end, :])
    return k_fold_data, score_matrix

def generate_score_matrix(train_data, src_index_name, tgt_index_name, score_index_name, origin_score_matrix):    
    score_matrix = deepcopy(origin_score_matrix)
    
    for record in train_data.iterrows():
        record = record[1]
        src_lang = record[src_index_name]
        tgt_lang = record[tgt_index_name]
        score = record[score_index_name]
        score_matrix.loc[src_lang, tgt_lang] = score
#         score_matrix[src_lang][tgt_lang] = score
    score_matrix.fillna(0, inplace=True)
    return score_matrix

def get_rmse(valid_data, model, src_index_name, tgt_index_name, score_matrix, train_matrix):
    rmse = 0.0
    src_langs = train_matrix.index.tolist()
    tgt_langs = train_matrix.columns.tolist()
    for cur_valid_data in valid_data.iterrows():
        cur_valid_data = cur_valid_data[1]
        src_lang, tgt_lang, score = cur_valid_data[src_index_name], cur_valid_data[tgt_index_name], cur_valid_data[model]
        src_idx = src_langs.index(src_lang)
        tgt_idx = tgt_langs.index(tgt_lang)
        prediction = score_matrix[src_idx][tgt_idx]
        rmse += (prediction - score) * (prediction - score)
    return np.sqrt(rmse / len(valid_data))

def get_result(alpha, beta, data_dir, scores, src_index, tgt_index, k, num_running, src_lang_side_info=None, \
               tgt_lang_side_info=None, lang_pair_side_info=None, src_si_len=0, tgt_si_len=0, \
               lang_pair_si_len=0):
    all_running_rmse = 0.0
    for nr in range(num_running):
        print("num running: " + str(nr+1))
        data, langs_matrix = split_k_fold_data(data_dir, scores, src_index, tgt_index, k)
        
        src_langs = langs_matrix.index.tolist()
        tgt_langs = langs_matrix.columns.tolist()
        
        res = {}
        for model in scores:
            print("-"*40)
            print(model)
            total_rmse = 0.0
            for i in range(k):
                print("\tFold {}: ".format(i+1))
                train_data, test_data = data[model]["train"][i], data[model]["test"][i]
                train_matrix = generate_score_matrix(train_data, src_index, tgt_index, model, langs_matrix)
                mf = MF(train_matrix, K=2, alpha=alpha, beta=beta, iterations=2000, test_samples=test_data, X=src_lang_side_info, \
                        Y=tgt_lang_side_info, Z=lang_pair_side_info, src_si_len=src_si_len, tgt_si_len=tgt_si_len, \
                        lang_pair_si_len=lang_pair_si_len, src_index=src_index, tgt_index=tgt_index, model=model, \
                       num_running=nr)
                trainging_log = mf.train()
#                 predictions = mf.full_matrix()
#                 cur_rmse = get_rmse(test_data, model, src_index, tgt_index, predictions, train_matrix)
                cur_rmse = mf.evaluate_testing(test_data, src_index, tgt_index, model)
#                 mf.draw_error_curve(i)
                total_rmse += cur_rmse
                print("\t\trmse is {}.".format(cur_rmse))
                print("*" * 20)
                
            average_rmse = total_rmse / k
            print("average rmse: " + str(average_rmse))
            res[model] = average_rmse
        res_rmse = 0.0
        for key, value in res.items():
            res_rmse += value
        all_running_rmse += res_rmse / len(scores)
    return all_running_rmse / num_running

def get_language_pair_side_info(data_dir, side_info_features, src_lang_name, tgt_lang_name):
    data = pd.read_csv(data_dir)
    side_dict = {}
    for record in data.iterrows():
        record = record[1]
        src_lang = record[src_lang_name]
        tgt_lang = record[tgt_lang_name]
        side_dict[src_lang + "_" + tgt_lang] = record[side_info_features].values
    return side_dict

def get_language_side_information(data_dir, src_lang_name, tgt_lang_name):
    
    data = pd.read_csv(data_dir)
    side_dict = {}
    src_langs = set()
    tgt_langs = set()
    
    for record in data.iterrows():
        record = record[1]
        src_lang = record[src_lang_name]
        tgt_lang = record[tgt_lang_name]
        src_langs.add(src_lang)
        tgt_langs.add(tgt_lang)
        
    src_langs = list(src_langs)
    tgt_langs = list(tgt_langs)
        
    return languages_to_features(src_langs), languages_to_features(tgt_langs)

def languages_to_features(languages_list):
    from sklearn.decomposition import PCA
    import lang2vec.lang2vec as l2v
    features_list = ["syntax_knn", "phonology_knn", "inventory_knn"]
    features = l2v.get_features(languages_list, features_list)
    
    features_matrix = []
    for language in languages_list:
        features_matrix.append(features[language])
        
    pca = PCA(n_components=6)   
    pca_features_matrix = pca.fit_transform(features_matrix)
    res = {}
    
    for i, language in enumerate(languages_list):
        res[languages_list[i]] = pca_features_matrix[i]
    return res

In [5]:
WIKI_MT_DIR = "../data/data_wiki_new.csv"
WIKI_SRC = "Source"
WIKI_TGT = "Target"
WIKI_SCORE = ["BLEU"]
WIKI_SIDE_FEATURES = ['geographic', 'genetic', 'inventory', 'syntactic', 'phonological', 'featural']

BLI_DIR = "../data/data_bli2_new.csv"
BLI_SRC = "Source Language Code"
BLI_TGT = "Target Language Code"
BLI_SCORE = ["Muse", "Vecmap"]
BLI_SIDE_FEATURES = ['genetic', 'syntactic', 'featural', 'phonological', 'inventory', 'geographic']
# The score metric is from "NLPerf/src/task_feats.py"

k = 5
num_running = 10

WIKI_SIDE_INFO_DICT = get_language_pair_side_info(WIKI_MT_DIR, WIKI_SIDE_FEATURES, WIKI_SRC, WIKI_TGT)
WIKI_SRC_SIDE_INFO_DICT, WIKI_TGT_SIDE_INFO_DICT = get_language_side_information(WIKI_MT_DIR, WIKI_SRC, WIKI_TGT)

BLI_SIDE_INFO_DICT = get_language_pair_side_info(BLI_DIR, BLI_SIDE_FEATURES, BLI_SRC, BLI_TGT)
BLI_SRC_SIDE_INFO_DICT, BLI_TGT_SIDE_INFO_DICT = get_language_side_information(BLI_DIR, BLI_SRC, BLI_TGT)


candidate_alpha = [0.001, 0.0005, 0.0001]
candidate_beta = [0.1, 0.01, 0.001, 0.0005]

gs_result = {}

for alpha in candidate_alpha:
    for beta in candidate_beta:
        wiki_result = get_result(alpha, beta, WIKI_MT_DIR, WIKI_SCORE, WIKI_SRC, WIKI_TGT, k, num_running, \
                                 src_lang_side_info=WIKI_SRC_SIDE_INFO_DICT, \
                                 tgt_lang_side_info=WIKI_TGT_SIDE_INFO_DICT, \
                                 lang_pair_side_info=WIKI_SIDE_INFO_DICT, \
                                 src_si_len=6, tgt_si_len=6, lang_pair_si_len = 6)
        bli_result = get_result(alpha, beta, BLI_DIR, BLI_SCORE, BLI_SRC, BLI_TGT, k, num_running, \
                                 src_lang_side_info=BLI_SRC_SIDE_INFO_DICT, \
                                 tgt_lang_side_info=BLI_TGT_SIDE_INFO_DICT, \
                                 lang_pair_side_info=BLI_SIDE_INFO_DICT, \
                                 src_si_len=6, tgt_si_len=6, lang_pair_si_len = 6)
        gs_result[str(alpha) + "_" + str(beta)] = [wiki_result, bli_result]

# wiki_mt_score_matrix = generate_score_matrix(WIKI_MT_DIR, "Source", "Target", "BLEU")
# wiki_mt_score_matrix, wiki_mt_valid = generate_score_matrix(WIKI_MT_DIR, "Source", "Target", "BLEU")xdz
# bli_score_matrix, bli_valid = generate_score_matrix(BLI_DIR, "Source Language Code", "Target Language Code", ["Muse", "Vecmap"])

num running: 1
----------------------------------------
BLEU
	Fold 1: 
		rmse is 4.64202739739885.
********************
	Fold 2: 
		rmse is 3.1768864539567545.
********************
	Fold 3: 
		rmse is 3.0559040020441843.
********************
	Fold 4: 
		rmse is 3.649358244041406.
********************
	Fold 5: 
		rmse is 2.364253504499103.
********************
average rmse: 3.3776859203880596
num running: 2
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.738767179988564.
********************
	Fold 2: 
		rmse is 4.068062717830568.
********************
	Fold 3: 
		rmse is 3.671477827610017.
********************
	Fold 4: 
		rmse is 2.65298040552037.
********************
	Fold 5: 
		rmse is 3.117177779614459.
********************
average rmse: 3.449693182112796
num running: 3
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.279618391094576.
********************
	Fold 2: 
		rmse is 2.9568721838081835.
********************
	Fold 3: 
		rmse is 3.349578817

		rmse is 18.884382432896412.
********************
	Fold 2: 
		rmse is 17.67407944446076.
********************
	Fold 3: 
		rmse is 14.414164844390658.
********************
	Fold 4: 
		rmse is 14.733379001485336.
********************
	Fold 5: 
		rmse is 20.44362264771108.
********************
average rmse: 17.22992567418885
num running: 7
----------------------------------------
Muse
	Fold 1: 
		rmse is 7.811319373208779.
********************
	Fold 2: 
		rmse is 19.802282821080293.
********************
	Fold 3: 
		rmse is 12.325141900322421.
********************
	Fold 4: 
		rmse is 11.367282839372749.
********************
	Fold 5: 
		rmse is 16.477706229532444.
********************
average rmse: 13.556746632703337
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 24.23019151046173.
********************
	Fold 2: 
		rmse is 16.915007350388205.
********************
	Fold 3: 
		rmse is 9.556386101179358.
********************
	Fold 4: 
		rmse is 11.053639870706656.
********

		rmse is 17.945285421260273.
********************
	Fold 2: 
		rmse is 17.22299025665299.
********************
	Fold 3: 
		rmse is 25.602255741629847.
********************
	Fold 4: 
		rmse is 11.147426811467202.
********************
	Fold 5: 
		rmse is 7.345576687336479.
********************
average rmse: 15.852706983669359
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 7.774598203127363.
********************
	Fold 2: 
		rmse is 13.57217743859812.
********************
	Fold 3: 
		rmse is 11.638907516540833.
********************
	Fold 4: 
		rmse is 10.59155534588112.
********************
	Fold 5: 
		rmse is 12.045916566895917.
********************
average rmse: 11.12463101420867
num running: 3
----------------------------------------
Muse
	Fold 1: 
		rmse is 32.21412841138615.
********************
	Fold 2: 
		rmse is 13.755074817874485.
********************
	Fold 3: 
		rmse is 12.091480510754076.
********************
	Fold 4: 
		rmse is 21.191999590053758.
*********

		rmse is 3.3023001593412937.
********************
	Fold 2: 
		rmse is 2.903783414775579.
********************
	Fold 3: 
		rmse is 6.439814356801279.
********************
	Fold 4: 
		rmse is 5.038310342663263.
********************
	Fold 5: 
		rmse is 2.5631027878531487.
********************
average rmse: 4.0494622122869135
num running: 5
----------------------------------------
BLEU
	Fold 1: 
		rmse is 2.5299478202143466.
********************
	Fold 2: 
		rmse is 6.326048181431116.
********************
	Fold 3: 
		rmse is 2.7016512698489423.
********************
	Fold 4: 
		rmse is 4.543164476511302.
********************
	Fold 5: 
		rmse is 5.347728479974704.
********************
average rmse: 4.289708045596082
num running: 6
----------------------------------------
BLEU
	Fold 1: 
		rmse is 5.153239968079905.
********************
	Fold 2: 
		rmse is 3.065584588692309.
********************
	Fold 3: 
		rmse is 2.202044969865201.
********************
	Fold 4: 
		rmse is 3.8078571583461187.

		rmse is 17.181500927408017.
********************
	Fold 2: 
		rmse is 33.118766035758554.
********************
	Fold 3: 
		rmse is 7.530223670275481.
********************
	Fold 4: 
		rmse is 9.842058182293274.
********************
	Fold 5: 
		rmse is 21.838159011693175.
********************
average rmse: 17.9021415654857
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 20.79837734949032.
********************
	Fold 2: 
		rmse is 16.29995046988588.
********************
	Fold 3: 
		rmse is 5.09269699570128.
********************
	Fold 4: 
		rmse is 8.145283652082878.
********************
	Fold 5: 
		rmse is 6.1117148414006985.
********************
average rmse: 11.28960466171221
num running: 9
----------------------------------------
Muse
	Fold 1: 
		rmse is 43.00204818417852.
********************
	Fold 2: 
		rmse is 9.978654488947324.
********************
	Fold 3: 
		rmse is 21.06258426871964.
********************
	Fold 4: 
		rmse is 18.772458787466753.
***************

		rmse is 15.896485011273281.
********************
	Fold 2: 
		rmse is 13.849233721097757.
********************
	Fold 3: 
		rmse is 13.527904897491785.
********************
	Fold 4: 
		rmse is 9.006685688342186.
********************
	Fold 5: 
		rmse is 13.28106098568364.
********************
average rmse: 13.11227406077773
num running: 4
----------------------------------------
Muse
	Fold 1: 
		rmse is 18.342601924807028.
********************
	Fold 2: 
		rmse is 8.83698201229434.
********************
	Fold 3: 
		rmse is 10.145957393419472.
********************
	Fold 4: 
		rmse is 10.803123902262874.
********************
	Fold 5: 
		rmse is 17.677411726071423.
********************
average rmse: 13.161215391771028
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 16.316562847326228.
********************
	Fold 2: 
		rmse is 28.320136733130877.
********************
	Fold 3: 
		rmse is 10.572091508584121.
********************
	Fold 4: 
		rmse is 11.110217510768335.
*******

		rmse is 2.958462372250597.
********************
	Fold 2: 
		rmse is 4.117939276600185.
********************
	Fold 3: 
		rmse is 2.906521117408289.
********************
	Fold 4: 
		rmse is 2.9896219703270375.
********************
	Fold 5: 
		rmse is 3.398620664710608.
********************
average rmse: 3.274233080259344
num running: 8
----------------------------------------
BLEU
	Fold 1: 
		rmse is 4.261843052456346.
********************
	Fold 2: 
		rmse is 3.9930304584376928.
********************
	Fold 3: 
		rmse is 2.4259389346389484.
********************
	Fold 4: 
		rmse is 4.0518488583978876.
********************
	Fold 5: 
		rmse is 4.556330027023452.
********************
average rmse: 3.8577982661908647
num running: 9
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.7493080622761172.
********************
	Fold 2: 
		rmse is 3.672166895647837.
********************
	Fold 3: 
		rmse is 2.9405743561812794.
********************
	Fold 4: 
		rmse is 2.876753120698979

		rmse is 8.973309817066562.
********************
	Fold 2: 
		rmse is 10.697560410985194.
********************
	Fold 3: 
		rmse is 14.289409273298139.
********************
	Fold 4: 
		rmse is 13.850807821575668.
********************
	Fold 5: 
		rmse is 12.886496142102176.
********************
average rmse: 12.139516693005547
num running: 10
----------------------------------------
Muse
	Fold 1: 
		rmse is 11.178098763942284.
********************
	Fold 2: 
		rmse is 7.384532543024678.
********************
	Fold 3: 
		rmse is 17.758236982477843.
********************
	Fold 4: 
		rmse is 8.615647821809548.
********************
	Fold 5: 
		rmse is 15.627820627658313.
********************
average rmse: 12.112867347782531
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 18.560744660971476.
********************
	Fold 2: 
		rmse is 9.11773322986804.
********************
	Fold 3: 
		rmse is 14.565175436398345.
********************
	Fold 4: 
		rmse is 7.661821510646459.
*******

		rmse is 13.05980366595115.
********************
	Fold 2: 
		rmse is 46.78235189714683.
********************
	Fold 3: 
		rmse is 8.788565068747912.
********************
	Fold 4: 
		rmse is 17.383324635428934.
********************
	Fold 5: 
		rmse is 12.500450133710089.
********************
average rmse: 19.702899080196982
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 11.052133259105394.
********************
	Fold 2: 
		rmse is 13.18846854350965.
********************
	Fold 3: 
		rmse is 6.9987935465275655.
********************
	Fold 4: 
		rmse is 17.489372256655024.
********************
	Fold 5: 
		rmse is 7.817476722735498.
********************
average rmse: 11.309248865706627
num running: 6
----------------------------------------
Muse
	Fold 1: 
		rmse is 14.609042463705695.
********************
	Fold 2: 
		rmse is 26.992977364011104.
********************
	Fold 3: 
		rmse is 15.346604729531988.
********************
	Fold 4: 
		rmse is 7.70451378316143.
*********

		rmse is 2.7973432831661857.
********************
	Fold 2: 
		rmse is 4.53252291593111.
********************
	Fold 3: 
		rmse is 2.8915228374026216.
********************
	Fold 4: 
		rmse is 2.8246332794571707.
********************
	Fold 5: 
		rmse is 2.682345043377768.
********************
average rmse: 3.1456734718669717
num running: 1
----------------------------------------
Muse
	Fold 1: 
		rmse is 12.361690323090313.
********************
	Fold 2: 
		rmse is 17.00554944940098.
********************
	Fold 3: 
		rmse is 12.124734604789026.
********************
	Fold 4: 
		rmse is 28.164314116903366.
********************
	Fold 5: 
		rmse is 8.271285965794617.
********************
average rmse: 15.585514891995661
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 27.772738993071478.
********************
	Fold 2: 
		rmse is 12.077336638944312.
********************
	Fold 3: 
		rmse is 10.126001496212883.
********************
	Fold 4: 
		rmse is 8.541878590430382.
********

		rmse is 2.834798220466591.
********************
	Fold 3: 
		rmse is 3.824423089158886.
********************
	Fold 4: 
		rmse is 2.624761964571154.
********************
	Fold 5: 
		rmse is 2.4254961186025423.
********************
average rmse: 2.8717557053455556
num running: 2
----------------------------------------
BLEU
	Fold 1: 
		rmse is 5.597632911772726.
********************
	Fold 2: 
		rmse is 3.7328493777392913.
********************
	Fold 3: 
		rmse is 3.839399991119869.
********************
	Fold 4: 
		rmse is 4.707565257992897.
********************
	Fold 5: 
		rmse is 3.3048499938416507.
********************
average rmse: 4.236459506493286
num running: 3
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.095566395364206.
********************
	Fold 2: 
		rmse is 3.3171750584895268.
********************
	Fold 3: 
		rmse is 3.2581183503086963.
********************
	Fold 4: 
		rmse is 2.60515170715975.
********************
	Fold 5: 
		rmse is 2.767555890428636.


		rmse is 8.981270969204585.
********************
	Fold 3: 
		rmse is 8.11081561901257.
********************
	Fold 4: 
		rmse is 12.979825224256375.
********************
	Fold 5: 
		rmse is 12.69353986021152.
********************
average rmse: 11.24316832952276
num running: 7
----------------------------------------
Muse
	Fold 1: 
		rmse is 10.615114386066084.
********************
	Fold 2: 
		rmse is 13.6912555089508.
********************
	Fold 3: 
		rmse is 8.527166310593074.
********************
	Fold 4: 
		rmse is 20.352639508924383.
********************
	Fold 5: 
		rmse is 12.329111737681716.
********************
average rmse: 13.103057490443211
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 11.706032305140644.
********************
	Fold 2: 
		rmse is 14.943774019770641.
********************
	Fold 3: 
		rmse is 6.077423695981559.
********************
	Fold 4: 
		rmse is 12.490045068891881.
********************
	Fold 5: 
		rmse is 9.066527224614571.
************

		rmse is 14.79695076804459.
********************
	Fold 2: 
		rmse is 19.099611981800752.
********************
	Fold 3: 
		rmse is 11.338091745958.
********************
	Fold 4: 
		rmse is 14.02187421555876.
********************
	Fold 5: 
		rmse is 17.38739504749019.
********************
average rmse: 15.32878475177046
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 12.867897066844387.
********************
	Fold 2: 
		rmse is 10.769969081966426.
********************
	Fold 3: 
		rmse is 9.69688457336655.
********************
	Fold 4: 
		rmse is 10.200708136940069.
********************
	Fold 5: 
		rmse is 10.697469843800679.
********************
average rmse: 10.84658574058362
num running: 3
----------------------------------------
Muse
	Fold 1: 
		rmse is 7.907264148965878.
********************
	Fold 2: 
		rmse is 19.553808896404195.
********************
	Fold 3: 
		rmse is 11.950457217614453.
********************
	Fold 4: 
		rmse is 6.78881330167042.
***************

		rmse is 2.716116309956405.
********************
	Fold 2: 
		rmse is 3.337028863436834.
********************
	Fold 3: 
		rmse is 2.8170127261710562.
********************
	Fold 4: 
		rmse is 3.417459101986899.
********************
	Fold 5: 
		rmse is 2.9478915567807737.
********************
average rmse: 3.047101711666394
num running: 5
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.3394198678892257.
********************
	Fold 2: 
		rmse is 3.097388556555493.
********************
	Fold 3: 
		rmse is 3.4407974228550198.
********************
	Fold 4: 
		rmse is 2.474249992836541.
********************
	Fold 5: 
		rmse is 3.1341139922360086.
********************
average rmse: 3.0971939664744577
num running: 6
----------------------------------------
BLEU
	Fold 1: 
		rmse is 2.9257332112205217.
********************
	Fold 2: 
		rmse is 3.7699970799729576.
********************
	Fold 3: 
		rmse is 3.1720860831233377.
********************
	Fold 4: 
		rmse is 2.8138371863518

		rmse is 8.124875683739159.
********************
	Fold 2: 
		rmse is 14.71523862195043.
********************
	Fold 3: 
		rmse is 16.659335967393133.
********************
	Fold 4: 
		rmse is 9.36378188763893.
********************
	Fold 5: 
		rmse is 6.91034261025457.
********************
average rmse: 11.154714954195246
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 8.938447179814746.
********************
	Fold 2: 
		rmse is 10.906595397210136.
********************
	Fold 3: 
		rmse is 10.414504973474378.
********************
	Fold 4: 
		rmse is 9.11546784075684.
********************
	Fold 5: 
		rmse is 7.753644135217644.
********************
average rmse: 9.42573190529475
num running: 9
----------------------------------------
Muse
	Fold 1: 
		rmse is 22.027879222556784.
********************
	Fold 2: 
		rmse is 13.568283357256696.
********************
	Fold 3: 
		rmse is 7.679842031345018.
********************
	Fold 4: 
		rmse is 11.483778231769687.
***************

		rmse is 10.737879280606384.
********************
	Fold 2: 
		rmse is 8.813963764058673.
********************
	Fold 3: 
		rmse is 10.252624173772551.
********************
	Fold 4: 
		rmse is 6.756301737309425.
********************
	Fold 5: 
		rmse is 14.019773804249414.
********************
average rmse: 10.11610855199929
num running: 4
----------------------------------------
Muse
	Fold 1: 
		rmse is 7.457097295118973.
********************
	Fold 2: 
		rmse is 14.397147221060173.
********************
	Fold 3: 
		rmse is 12.340313800183146.
********************
	Fold 4: 
		rmse is 17.29548244530621.
********************
	Fold 5: 
		rmse is 10.876378429436441.
********************
average rmse: 12.473283838220988
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 11.133246858546528.
********************
	Fold 2: 
		rmse is 11.91207686195574.
********************
	Fold 3: 
		rmse is 10.562141898598236.
********************
	Fold 4: 
		rmse is 11.455693827366455.
********

		rmse is 3.169463280571177.
********************
	Fold 2: 
		rmse is 2.398293306366966.
********************
	Fold 3: 
		rmse is 3.2330920288008347.
********************
	Fold 4: 
		rmse is 3.377260724938949.
********************
	Fold 5: 
		rmse is 3.083568825748331.
********************
average rmse: 3.052335633285252
num running: 8
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.5135749716687372.
********************
	Fold 2: 
		rmse is 3.07814345451843.
********************
	Fold 3: 
		rmse is 3.525054094262606.
********************
	Fold 4: 
		rmse is 2.95468371307201.
********************
	Fold 5: 
		rmse is 2.737606381646134.
********************
average rmse: 3.1618125230335834
num running: 9
----------------------------------------
BLEU
	Fold 1: 
		rmse is 2.7747675928748405.
********************
	Fold 2: 
		rmse is 2.8132263434190983.
********************
	Fold 3: 
		rmse is 3.6660114113411923.
********************
	Fold 4: 
		rmse is 2.5890831697413033.


		rmse is 13.478621343617347.
********************
	Fold 2: 
		rmse is 9.759715412089598.
********************
	Fold 3: 
		rmse is 14.203142069370541.
********************
	Fold 4: 
		rmse is 10.316126935772287.
********************
	Fold 5: 
		rmse is 11.847861086943732.
********************
average rmse: 11.921093369558701
num running: 10
----------------------------------------
Muse
	Fold 1: 
		rmse is 17.983664548497888.
********************
	Fold 2: 
		rmse is 11.840756808734902.
********************
	Fold 3: 
		rmse is 9.406910552795967.
********************
	Fold 4: 
		rmse is 9.75906369827227.
********************
	Fold 5: 
		rmse is 11.448651051307642.
********************
average rmse: 12.087809331921735
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 12.591546168150986.
********************
	Fold 2: 
		rmse is 10.516455597310216.
********************
	Fold 3: 
		rmse is 5.4968116995870835.
********************
	Fold 4: 
		rmse is 11.191581226994176.
*****

In [6]:
print(wiki_result)
print(bli_result)

3.0465126109222616
12.120707702308929


In [7]:
gs_result

{'0.001_0.1': [3.3814885965135573, 15.118615173931278],
 '0.001_0.01': [3.859721729873808, 15.409556541286358],
 '0.001_0.001': [4.383395411270361, 14.30048998636131],
 '0.001_0.0005': [3.903898608353186, 15.584978349843624],
 '0.0005_0.1': [3.2991345778874255, 13.966054313087358],
 '0.0005_0.01': [3.514177606379706, 13.406637365047843],
 '0.0005_0.001': [3.4376507338508917, 13.788407359547225],
 '0.0005_0.0005': [3.45139545993868, 12.244646805142299],
 '0.0001_0.1': [3.1333172045233844, 12.086752425791683],
 '0.0001_0.01': [3.07091057171695, 11.126519810711907],
 '0.0001_0.001': [3.0668349759459987, 11.471938982839177],
 '0.0001_0.0005': [3.0465126109222616, 12.120707702308929]}