In [1]:
%matplotlib inline

In [2]:
import numpy as np

class MF():

    def __init__(self, R, K, alpha, beta, iterations, test_samples, X=None, Y=None, Z=None, src_si_len=0, \
                 tgt_si_len=0, lang_pair_si_len=0, src_index=None, tgt_index=None, model=None, num_running=0):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - R (ndarray)                  : src-tgt language rating matrix
        - K (int)                      : number of latent dimensions
        - alpha (float)                : learning rate
        - beta (float)                 : regularization parameter
        - X (dict)                     : source language side information
        - Y (dict)                     : target language side information
        - Z (dict)                     : language pair side information
        - src_si_len(int)              : source language side information length
        - tgt_si_len(int)              : target language side information length
        - lang_pair_si_len(int)        : language pair side information length
        """

        self.R = np.array(R)
        self.Prediction = deepcopy(self.R)
        self.src_langs = R.index.tolist()
        self.tgt_langs = R.columns.tolist()
        self.num_src, self.num_tgt = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations
        self.X = X
        self.Y = Y
        self.Z = Z
        self.src_si_len = src_si_len
        self.tgt_si_len = tgt_si_len
        self.lang_pair_si_len = lang_pair_si_len
        self.test_samples = test_samples
        self.src_index=src_index 
        self.tgt_index=tgt_index
        self.model=model
        self.num_running = num_running
        self.score_dict = {'BLEU': "WIKI-MT", "Muse": "BLI-Muse", "Vecmap": "BLI-Vecmap"}
        self.traing_error_log=[]
        self.test_error_log=[]
        

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_src, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_tgt, self.K))

        # Initialize side information's parameter if necesary
        if self.X and self.src_si_len:
            self.A = np.random.normal(scale = 1./self.src_si_len, size=self.src_si_len)
        if self.Y and self.tgt_si_len:
            self.B = np.random.normal(scale = 1./self.tgt_si_len, size=self.tgt_si_len)
        if self.Z and self.lang_pair_si_len:
            self.C = np.random.normal(scale = 1./self.lang_pair_si_len, size=self.lang_pair_si_len)
        
        # Initialize the biases
        # the biases of users and items are initilized as 0
        # the bias of rating is initilized as mean value
        self.b_u = np.zeros(self.num_src)
        self.b_i = np.zeros(self.num_tgt)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # Create a list of training samples (where rating > 0)
        self.samples = []
        for i in range(self.num_src):
            for j in range(self.num_tgt):
                if self.R[i,j] > 0:
                    cur_tuple = [i, j, self.R[i, j]]
                    src_lang = self.src_langs[i]
                    tgt_lang = self.tgt_langs[j]
                    if self.X:
                        if src_lang in self.X.keys():
                            cur_tuple.append(self.X[src_lang])
                        else:
                            raise KeyError
                    if self.Y:
                        if tgt_lang in self.Y.keys():
                            cur_tuple.append(self.Y[src_lang])
                        else:
                            raise KeyError
                    if self.Z:
                        if src_lang + "_" + tgt_lang in self.Z.keys():
                            cur_tuple.append(self.Z[src_lang + "_" + tgt_lang])
                        else:
                            raise KeyError
                    self.samples.append(tuple(cur_tuple))

        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            # shuffle training samples
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 100 == 0:
                self.traing_error_log.append((i, mse))
                test_mse = self.evaluate_testing(self.test_samples, self.src_index, self.tgt_index, self.model)
                self.test_error_log.append((i, test_mse))
#                 print("\t\tIteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
#         predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x][y] - self.Prediction[x][y], 2)
        return np.sqrt(error / len(xs))

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for sample in self.samples:
            i, j, r = sample[0], sample[1], sample[2]
            # Computer prediction and error
            prediction = self.get_rating(sample)
            self.Prediction[i][j] = prediction
            e = (r - prediction)

            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])
            
            # Update side information parameter if necessary
            cur_index = 3
            if self.X:
                x = np.array(sample[cur_index], dtype=np.float64)
                cur_index += 1
                self.A += self.alpha * (e * x - self.beta * self.A)
            if self.Y:
                y = np.array(sample[cur_index], dtype=np.float64)
                cur_index += 1
                self.B += self.alpha * (e * y - self.beta * self.B)
            if self.Z:
                z = np.array(sample[cur_index], dtype=np.float64)
                self.C += self.alpha * (e * z - self.beta * self.C)
                

    def get_rating(self, sample):
        """
        Get the predicted rating of sample
        """
        i, j, r = sample[0], sample[1], sample[2]
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        cur_index = 3
        if self.X:
            x = sample[cur_index]
            cur_index += 1
            prediction += self.A.dot(x.T)
        if self.Y:
            y = sample[cur_index]
            cur_index += 1
            prediction += self.B.dot(y.T)
        if self.Z:
            z = sample[cur_index]
            prediction += self.C.dot(z.T)
        return prediction
    
    def evaluate_testing(self, test_data, src_index_name, tgt_index_name, score_index_name):
        """
        Predict the score for testing data
        """ 
        rmse = 0.0
        for record in test_data.iterrows():
            record = record[1]
            src_lang = record[src_index_name]
            tgt_lang = record[tgt_index_name]
            src_lang_index = self.src_langs.index(src_lang)
            tgt_lang_index = self.tgt_langs.index(tgt_lang)
            score = record[score_index_name]
            cur_tuple = [src_lang_index, tgt_lang_index, score]
            if self.X:
                if src_lang in self.X.keys():
                    cur_tuple.append(self.X[src_lang])
                else:
                    raise KeyError
            if self.Y:
                if tgt_lang in self.Y.keys():
                    cur_tuple.append(self.Y[src_lang])
                else:
                    raise KeyError
            if self.Z:
                if src_lang + "_" + tgt_lang in self.Z.keys():
                    cur_tuple.append(self.Z[src_lang + "_" + tgt_lang])
                else:
                    raise KeyError
            prediction = self.get_rating(tuple(cur_tuple))
            rmse += (prediction - score) * (prediction - score)
        return np.sqrt(rmse / len(test_data))
        

    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P, Q, A, B, and C
        """
        res = deepcopy(self.R)
        for i in range(self.num_src):
            for j in range(self.num_tgt):
                src_lang = self.src_langs[i]
                tgt_lang = self.tgt_langs[j]
                res[i][j] = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
                if self.X and src_lang in self.X.keys():
                    x = self.X[src_lang]
                    res[i][j] += self.A.dot(x.T)
                if self.Y and tgt_lang in self.Y.keys():
                    y = self.X[tgt_lang]
                    res[i][j] += self.A.dot(x.T)
                if self.Z and src_lang + "_" + tgt_lang in self.Z.keys():
                    z = self.X[src_lang + "_" + tgt_lang]
                    res[i][j] += self.A.dot(x.T)
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)
    
    def draw_error_curve(self, i):
        import matplotlib.pyplot as plt
        import numpy as np
        iters = []
        train_loss = []
        test_loss = []
        for item in self.traing_error_log:
            iters.append(item[0])
            train_loss.append(item[1])
        for item in self.test_error_log:
            test_loss.append(item[1])

        plt.plot(iters, train_loss, 'b', label='train loss')#'b'指：color='blue'
        plt.plot(iters, test_loss, 'r', label='test loss')#'r'指：color='red'

        plt.legend()  #显示上面的label
        plt.xlabel('Iteration')
        plt.ylabel('Loss')
        title = self.score_dict[self.model] + "_" + str(self.num_running+1) + "_Fold" + str(i+1)
        plt.title(title) 
#         plt.savefig("../result/without_si/" + self.score_dict[self.model] + "/fold" + str(i+1) + "/" + title + ".png")

        #plt.ylim(-1,1)#仅设置y轴坐标范围
        plt.show()

        

Represent the two tasks above as matrices (BLEU scores for Wiki-MT and Accuracy for BLI) 

In [3]:
import pandas as pd
import numpy as np
np.random.seed(2021)

In [4]:
from copy import deepcopy
import pandas as pd
import random as rd

def split_k_fold_data(file_dir, score_index_name, src_index_name, tgt_index_name, k=5):
    data = pd.read_csv(file_dir)
    
    # shuffle
    data = data.sample(frac=1)
    
    # generate score matrix
    src_langs = data[src_index_name].unique()
    tgt_langs = data[tgt_index_name].unique()
    score_matrix = pd.DataFrame(index = src_langs, columns = tgt_langs)
    
    # eliminate empty rows and columns
    data = data.dropna(axis=1, how="all")
    data = data.dropna(axis=0, how="all")
    
    # K fold split
    k_fold_data = {}
    models = list(score_index_name)
    lens= len(data)
    
    for i, model in enumerate(models):
        ex_per_fold = int(np.ceil(lens / k))
        for j in range(k):
            start = ex_per_fold * j
            end = ex_per_fold * (j + 1)
            if j == 0:
                k_fold_data[model] = {"train": [], "test": []}
            k_fold_data[model]["train"].append(pd.concat([data.iloc[:start, :], data.iloc[end:, :]], axis=0))
            k_fold_data[model]["test"].append(data.iloc[start:end, :])
    return k_fold_data, score_matrix

def generate_score_matrix(train_data, src_index_name, tgt_index_name, score_index_name, origin_score_matrix):    
    score_matrix = deepcopy(origin_score_matrix)
    
    for record in train_data.iterrows():
        record = record[1]
        src_lang = record[src_index_name]
        tgt_lang = record[tgt_index_name]
        score = record[score_index_name]
        score_matrix.loc[src_lang, tgt_lang] = score
#         score_matrix[src_lang][tgt_lang] = score
    score_matrix.fillna(0, inplace=True)
    return score_matrix

def get_rmse(valid_data, model, src_index_name, tgt_index_name, score_matrix, train_matrix):
    rmse = 0.0
    src_langs = train_matrix.index.tolist()
    tgt_langs = train_matrix.columns.tolist()
    for cur_valid_data in valid_data.iterrows():
        cur_valid_data = cur_valid_data[1]
        src_lang, tgt_lang, score = cur_valid_data[src_index_name], cur_valid_data[tgt_index_name], cur_valid_data[model]
        src_idx = src_langs.index(src_lang)
        tgt_idx = tgt_langs.index(tgt_lang)
        prediction = score_matrix[src_idx][tgt_idx]
        rmse += (prediction - score) * (prediction - score)
    return np.sqrt(rmse / len(valid_data))

def get_result(alpha, beta, data_dir, scores, src_index, tgt_index, k, num_running, src_lang_side_info=None, \
               tgt_lang_side_info=None, lang_pair_side_info=None, src_si_len=0, tgt_si_len=0, \
               lang_pair_si_len=0):
    all_running_rmse = 0.0
    for nr in range(num_running):
        print("num running: " + str(nr+1))
        data, langs_matrix = split_k_fold_data(data_dir, scores, src_index, tgt_index, k)
        
        src_langs = langs_matrix.index.tolist()
        tgt_langs = langs_matrix.columns.tolist()
        
        res = {}
        for model in scores:
            print("-"*40)
            print(model)
            total_rmse = 0.0
            for i in range(k):
                print("\tFold {}: ".format(i+1))
                train_data, test_data = data[model]["train"][i], data[model]["test"][i]
                train_matrix = generate_score_matrix(train_data, src_index, tgt_index, model, langs_matrix)
                mf = MF(train_matrix, K=2, alpha=alpha, beta=beta, iterations=2000, test_samples=test_data, X=src_lang_side_info, \
                        Y=tgt_lang_side_info, Z=lang_pair_side_info, src_si_len=src_si_len, tgt_si_len=tgt_si_len, \
                        lang_pair_si_len=lang_pair_si_len, src_index=src_index, tgt_index=tgt_index, model=model, \
                       num_running=nr)
                trainging_log = mf.train()
#                 predictions = mf.full_matrix()
#                 cur_rmse = get_rmse(test_data, model, src_index, tgt_index, predictions, train_matrix)
                cur_rmse = mf.evaluate_testing(test_data, src_index, tgt_index, model)
#                 mf.draw_error_curve(i)
                total_rmse += cur_rmse
                print("\t\trmse is {}.".format(cur_rmse))
                print("*" * 20)
                
            average_rmse = total_rmse / k
            print("average rmse: " + str(average_rmse))
            res[model] = average_rmse
        res_rmse = 0.0
        for key, value in res.items():
            res_rmse += value
        all_running_rmse += res_rmse / len(scores)
    return all_running_rmse / num_running

def get_language_pair_side_info(data_dir, side_info_features, src_lang_name, tgt_lang_name):
    data = pd.read_csv(data_dir)
    side_dict = {}
    for record in data.iterrows():
        record = record[1]
        src_lang = record[src_lang_name]
        tgt_lang = record[tgt_lang_name]
        side_dict[src_lang + "_" + tgt_lang] = record[side_info_features].values
    return side_dict

def get_language_side_information(data_dir, src_lang_name, tgt_lang_name):
    
    data = pd.read_csv(data_dir)
    side_dict = {}
    src_langs = set()
    tgt_langs = set()
    
    for record in data.iterrows():
        record = record[1]
        src_lang = record[src_lang_name]
        tgt_lang = record[tgt_lang_name]
        src_langs.add(src_lang)
        tgt_langs.add(tgt_lang)
        
    src_langs = list(src_langs)
    tgt_langs = list(tgt_langs)
        
    return languages_to_features(src_langs), languages_to_features(tgt_langs)

def languages_to_features(languages_list):
    from sklearn.decomposition import PCA
    import lang2vec.lang2vec as l2v
    features_list = ["syntax_knn", "phonology_knn", "inventory_knn"]
    features = l2v.get_features(languages_list, features_list)
    
    features_matrix = []
    for language in languages_list:
        features_matrix.append(features[language])
        
    pca = PCA(n_components=6)   
    pca_features_matrix = pca.fit_transform(features_matrix)
    res = {}
    
    for i, language in enumerate(languages_list):
        res[languages_list[i]] = pca_features_matrix[i]
    return res

In [5]:
WIKI_MT_DIR = "../data/data_wiki_new.csv"
WIKI_SRC = "Source"
WIKI_TGT = "Target"
WIKI_SCORE = ["BLEU"]
WIKI_SIDE_FEATURES = ['geographic', 'genetic', 'inventory', 'syntactic', 'phonological', 'featural']

BLI_DIR = "../data/data_bli2_new.csv"
BLI_SRC = "Source Language Code"
BLI_TGT = "Target Language Code"
BLI_SCORE = ["Muse", "Vecmap"]
BLI_SIDE_FEATURES = ['genetic', 'syntactic', 'featural', 'phonological', 'inventory', 'geographic']
# The score metric is from "NLPerf/src/task_feats.py"

k = 5
num_running = 10

WIKI_SIDE_INFO_DICT = get_language_pair_side_info(WIKI_MT_DIR, WIKI_SIDE_FEATURES, WIKI_SRC, WIKI_TGT)
WIKI_SRC_SIDE_INFO_DICT, WIKI_TGT_SIDE_INFO_DICT = get_language_side_information(WIKI_MT_DIR, WIKI_SRC, WIKI_TGT)

BLI_SIDE_INFO_DICT = get_language_pair_side_info(BLI_DIR, BLI_SIDE_FEATURES, BLI_SRC, BLI_TGT)
BLI_SRC_SIDE_INFO_DICT, BLI_TGT_SIDE_INFO_DICT = get_language_side_information(BLI_DIR, BLI_SRC, BLI_TGT)

candidate_alpha = [0.001, 0.0005, 0.0001]
candidate_beta = [0.1, 0.01, 0.001, 0.0005]

gs_result = {}

for alpha in candidate_alpha:
    for beta in candidate_beta:
        wiki_result = get_result(alpha, beta, WIKI_MT_DIR, WIKI_SCORE, WIKI_SRC, WIKI_TGT, k, num_running)
        bli_result = get_result(alpha, beta, BLI_DIR, BLI_SCORE, BLI_SRC, BLI_TGT, k, num_running)
        gs_result[str(alpha) + "_" + str(beta)] = [wiki_result, bli_result]
        

# wiki_mt_score_matrix = generate_score_matrix(WIKI_MT_DIR, "Source", "Target", "BLEU")
# wiki_mt_score_matrix, wiki_mt_valid = generate_score_matrix(WIKI_MT_DIR, "Source", "Target", "BLEU")xdz
# bli_score_matrix, bli_valid = generate_score_matrix(BLI_DIR, "Source Language Code", "Target Language Code", ["Muse", "Vecmap"])

num running: 1
----------------------------------------
BLEU
	Fold 1: 
		rmse is 4.750933687998991.
********************
	Fold 2: 
		rmse is 3.3181728173511673.
********************
	Fold 3: 
		rmse is 3.074334004495996.
********************
	Fold 4: 
		rmse is 3.7589244235696015.
********************
	Fold 5: 
		rmse is 2.4163097981941704.
********************
average rmse: 3.463734946321985
num running: 2
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.7352948662396446.
********************
	Fold 2: 
		rmse is 4.305299306516215.
********************
	Fold 3: 
		rmse is 3.6102318070424686.
********************
	Fold 4: 
		rmse is 2.940046259312515.
********************
	Fold 5: 
		rmse is 3.113381860619802.
********************
average rmse: 3.5408508199461295
num running: 3
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.265965376510646.
********************
	Fold 2: 
		rmse is 3.099193292395876.
********************
	Fold 3: 
		rmse is 2.97149

		rmse is 20.6637386447301.
********************
	Fold 2: 
		rmse is 7.980121868869213.
********************
	Fold 3: 
		rmse is 12.259136587250167.
********************
	Fold 4: 
		rmse is 6.542944835974371.
********************
	Fold 5: 
		rmse is 15.257787955286814.
********************
average rmse: 12.540745978422132
num running: 7
----------------------------------------
Muse
	Fold 1: 
		rmse is 9.142519582958085.
********************
	Fold 2: 
		rmse is 11.428358105311812.
********************
	Fold 3: 
		rmse is 28.407384300560544.
********************
	Fold 4: 
		rmse is 13.930310714188824.
********************
	Fold 5: 
		rmse is 40.69197581433897.
********************
average rmse: 20.720109703471643
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 17.062832107379784.
********************
	Fold 2: 
		rmse is 13.852069984340408.
********************
	Fold 3: 
		rmse is 17.899820924937302.
********************
	Fold 4: 
		rmse is 13.451330666478215.
********

----------------------------------------
Muse
	Fold 1: 
		rmse is 18.784153080555868.
********************
	Fold 2: 
		rmse is 14.769674569025547.
********************
	Fold 3: 
		rmse is 24.48266837062792.
********************
	Fold 4: 
		rmse is 11.936321535408432.
********************
	Fold 5: 
		rmse is 13.173883173045045.
********************
average rmse: 16.629340145732563
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 17.345626223102677.
********************
	Fold 2: 
		rmse is 14.749509043156044.
********************
	Fold 3: 
		rmse is 10.14862029534501.
********************
	Fold 4: 
		rmse is 23.745915060847757.
********************
	Fold 5: 
		rmse is 10.984219465108893.
********************
average rmse: 15.394778017512076
num running: 3
----------------------------------------
Muse
	Fold 1: 
		rmse is 14.10240016448207.
********************
	Fold 2: 
		rmse is 12.178632607045056.
********************
	Fold 3: 
		rmse is 37.47612744267349.
***********

		rmse is 2.946874176105301.
********************
	Fold 2: 
		rmse is 4.378072971978916.
********************
	Fold 3: 
		rmse is 3.6161036462030793.
********************
	Fold 4: 
		rmse is 3.1574401747789684.
********************
	Fold 5: 
		rmse is 3.7386561505603466.
********************
average rmse: 3.567429423925322
num running: 5
----------------------------------------
BLEU
	Fold 1: 
		rmse is 2.9246832884532643.
********************
	Fold 2: 
		rmse is 2.8749259508747524.
********************
	Fold 3: 
		rmse is 5.1276999013124485.
********************
	Fold 4: 
		rmse is 3.3481005241238178.
********************
	Fold 5: 
		rmse is 3.413545220915844.
********************
average rmse: 3.537790977136025
num running: 6
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.335719815607539.
********************
	Fold 2: 
		rmse is 4.092312884385578.
********************
	Fold 3: 
		rmse is 3.2487881232513067.
********************
	Fold 4: 
		rmse is 3.26839277088317

		rmse is 14.895832708099203.
********************
	Fold 2: 
		rmse is 10.246382318186878.
********************
	Fold 3: 
		rmse is 14.498569705758063.
********************
	Fold 4: 
		rmse is 13.823197906557255.
********************
	Fold 5: 
		rmse is 21.180189992813055.
********************
average rmse: 14.92883452628289
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 21.031651439816926.
********************
	Fold 2: 
		rmse is 22.683088178001544.
********************
	Fold 3: 
		rmse is 9.317478555656605.
********************
	Fold 4: 
		rmse is 26.227716790427838.
********************
	Fold 5: 
		rmse is 10.858850478272105.
********************
average rmse: 18.023757088435
num running: 9
----------------------------------------
Muse
	Fold 1: 
		rmse is 9.035741141769872.
********************
	Fold 2: 
		rmse is 17.409044949406262.
********************
	Fold 3: 
		rmse is 28.53612829086906.
********************
	Fold 4: 
		rmse is 98.08722852016508.
**********

		rmse is 26.617198652390375.
********************
	Fold 2: 
		rmse is 9.678807864600087.
********************
	Fold 3: 
		rmse is 37.84954283806978.
********************
	Fold 4: 
		rmse is 11.717978225354832.
********************
	Fold 5: 
		rmse is 16.670389197340192.
********************
average rmse: 20.50678335555105
num running: 4
----------------------------------------
Muse
	Fold 1: 
		rmse is 19.717325797600022.
********************
	Fold 2: 
		rmse is 28.89971651877453.
********************
	Fold 3: 
		rmse is 21.34419464949059.
********************
	Fold 4: 
		rmse is 17.24246496346286.
********************
	Fold 5: 
		rmse is 11.344961027926734.
********************
average rmse: 19.709732591450948
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 23.778876366285232.
********************
	Fold 2: 
		rmse is 17.42535930180821.
********************
	Fold 3: 
		rmse is 15.325242022193983.
********************
	Fold 4: 
		rmse is 12.807264989709562.
*********

		rmse is 3.179805853654881.
********************
	Fold 2: 
		rmse is 3.4038322052335497.
********************
	Fold 3: 
		rmse is 2.6381322343254787.
********************
	Fold 4: 
		rmse is 3.504055559449261.
********************
	Fold 5: 
		rmse is 3.964329140183398.
********************
average rmse: 3.3380309985693137
num running: 8
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.135962646682858.
********************
	Fold 2: 
		rmse is 4.611057680313274.
********************
	Fold 3: 
		rmse is 3.0430109774868095.
********************
	Fold 4: 
		rmse is 3.824260075384963.
********************
	Fold 5: 
		rmse is 3.523892243553847.
********************
average rmse: 3.6276367246843497
num running: 9
----------------------------------------
BLEU
	Fold 1: 
		rmse is 2.3276752675926167.
********************
	Fold 2: 
		rmse is 4.807403510686814.
********************
	Fold 3: 
		rmse is 3.3006455690190113.
********************
	Fold 4: 
		rmse is 3.004997633363802

		rmse is 16.90297287652342.
********************
	Fold 2: 
		rmse is 15.462004766060844.
********************
	Fold 3: 
		rmse is 11.31115983356043.
********************
	Fold 4: 
		rmse is 10.761852286651925.
********************
	Fold 5: 
		rmse is 6.8441590385219095.
********************
average rmse: 12.256429760263705
num running: 10
----------------------------------------
Muse
	Fold 1: 
		rmse is 11.873410517779936.
********************
	Fold 2: 
		rmse is 14.671932028276164.
********************
	Fold 3: 
		rmse is 18.10636240357422.
********************
	Fold 4: 
		rmse is 38.008071266932376.
********************
	Fold 5: 
		rmse is 8.937822147318935.
********************
average rmse: 18.319519672776327
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 11.973547208474299.
********************
	Fold 2: 
		rmse is 12.649418117881062.
********************
	Fold 3: 
		rmse is 10.721054383571307.
********************
	Fold 4: 
		rmse is 12.729788359109694.
*****

		rmse is 10.776457644920697.
********************
	Fold 2: 
		rmse is 18.409289781284482.
********************
	Fold 3: 
		rmse is 10.14928105062244.
********************
	Fold 4: 
		rmse is 19.41469216066189.
********************
	Fold 5: 
		rmse is 21.681052700591632.
********************
average rmse: 16.086154667616228
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 24.08753264027681.
********************
	Fold 2: 
		rmse is 13.411768334557502.
********************
	Fold 3: 
		rmse is 11.292302867343745.
********************
	Fold 4: 
		rmse is 9.614109126127468.
********************
	Fold 5: 
		rmse is 12.283061207830128.
********************
average rmse: 14.137754835227131
num running: 6
----------------------------------------
Muse
	Fold 1: 
		rmse is 20.492529257900863.
********************
	Fold 2: 
		rmse is 9.16640132201312.
********************
	Fold 3: 
		rmse is 12.770085666159561.
********************
	Fold 4: 
		rmse is 14.622969272911854.
********

		rmse is 15.10492593917349.
********************
	Fold 2: 
		rmse is 2.9705969172107802.
********************
	Fold 3: 
		rmse is 3.7628292511658827.
********************
	Fold 4: 
		rmse is 2.730649192170727.
********************
	Fold 5: 
		rmse is 2.9996595167238875.
********************
average rmse: 5.513732163288954
num running: 1
----------------------------------------
Muse
	Fold 1: 
		rmse is 14.185189728072535.
********************
	Fold 2: 
		rmse is 10.61957311327993.
********************
	Fold 3: 
		rmse is 11.839499491896218.
********************
	Fold 4: 
		rmse is 14.19779976123723.
********************
	Fold 5: 
		rmse is 40.65517007102113.
********************
average rmse: 18.29944643310141
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 13.49506988972673.
********************
	Fold 2: 
		rmse is 10.100029055727083.
********************
	Fold 3: 
		rmse is 7.043198790415072.
********************
	Fold 4: 
		rmse is 13.92803728693451.
************

		rmse is 2.910431629780723.
********************
	Fold 2: 
		rmse is 3.79499531153555.
********************
	Fold 3: 
		rmse is 2.76333254508606.
********************
	Fold 4: 
		rmse is 3.407146125410069.
********************
	Fold 5: 
		rmse is 3.168835864278877.
********************
average rmse: 3.2089482952182555
num running: 2
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.254255507104774.
********************
	Fold 2: 
		rmse is 3.487836405094812.
********************
	Fold 3: 
		rmse is 3.1823923239267464.
********************
	Fold 4: 
		rmse is 3.0599662909280716.
********************
	Fold 5: 
		rmse is 4.060066075383585.
********************
average rmse: 3.4089033204875974
num running: 3
----------------------------------------
BLEU
	Fold 1: 
		rmse is 4.359830419304041.
********************
	Fold 2: 
		rmse is 4.758267260439743.
********************
	Fold 3: 
		rmse is 2.97248112155319.
********************
	Fold 4: 
		rmse is 3.8943308526501244.
***

		rmse is 15.266909315184877.
********************
	Fold 2: 
		rmse is 10.778886863110635.
********************
	Fold 3: 
		rmse is 7.692151799816224.
********************
	Fold 4: 
		rmse is 5.470877963532243.
********************
	Fold 5: 
		rmse is 28.16286410487137.
********************
average rmse: 13.474338009303072
num running: 7
----------------------------------------
Muse
	Fold 1: 
		rmse is 10.248199899963703.
********************
	Fold 2: 
		rmse is 20.109696328416582.
********************
	Fold 3: 
		rmse is 23.730763204535947.
********************
	Fold 4: 
		rmse is 9.705572156270158.
********************
	Fold 5: 
		rmse is 32.14064010431765.
********************
average rmse: 19.186974338700807
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 31.833974409592752.
********************
	Fold 2: 
		rmse is 11.75396579967446.
********************
	Fold 3: 
		rmse is 22.558787927052393.
********************
	Fold 4: 
		rmse is 6.146068547105691.
*********

		rmse is 10.569174883057945.
********************
	Fold 2: 
		rmse is 18.480523900872015.
********************
	Fold 3: 
		rmse is 20.909747030382515.
********************
	Fold 4: 
		rmse is 22.328556468901148.
********************
	Fold 5: 
		rmse is 17.42463511328124.
********************
average rmse: 17.94252747929897
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 7.258965929732922.
********************
	Fold 2: 
		rmse is 11.625723649813043.
********************
	Fold 3: 
		rmse is 10.21728243929534.
********************
	Fold 4: 
		rmse is 17.5215364139029.
********************
	Fold 5: 
		rmse is 9.46805065078025.
********************
average rmse: 11.21831181670489
num running: 3
----------------------------------------
Muse
	Fold 1: 
		rmse is 14.746640144005095.
********************
	Fold 2: 
		rmse is 14.683253808652688.
********************
	Fold 3: 
		rmse is 20.30231729707437.
********************
	Fold 4: 
		rmse is 11.216540438094993.
************

		rmse is 3.463297475529508.
********************
	Fold 2: 
		rmse is 2.9373219827811843.
********************
	Fold 3: 
		rmse is 3.1502780573690536.
********************
	Fold 4: 
		rmse is 3.117902636404801.
********************
	Fold 5: 
		rmse is 5.102048145299451.
********************
average rmse: 3.5541696594767997
num running: 5
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.056280862042364.
********************
	Fold 2: 
		rmse is 3.0282747564051813.
********************
	Fold 3: 
		rmse is 3.8740410103402114.
********************
	Fold 4: 
		rmse is 2.9696386467652642.
********************
	Fold 5: 
		rmse is 3.179830338308686.
********************
average rmse: 3.2216131227723417
num running: 6
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.9061671389835904.
********************
	Fold 2: 
		rmse is 3.217077305400338.
********************
	Fold 3: 
		rmse is 4.0073093482390085.
********************
	Fold 4: 
		rmse is 3.7544807055000

		rmse is 12.889374586524402.
********************
	Fold 2: 
		rmse is 12.947299921538852.
********************
	Fold 3: 
		rmse is 16.785979307284784.
********************
	Fold 4: 
		rmse is 11.400739886696316.
********************
	Fold 5: 
		rmse is 12.293206694375947.
********************
average rmse: 13.26332007928406
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 7.369278434189748.
********************
	Fold 2: 
		rmse is 11.21927582469089.
********************
	Fold 3: 
		rmse is 13.855029508050256.
********************
	Fold 4: 
		rmse is 16.462650121199058.
********************
	Fold 5: 
		rmse is 7.964097540684269.
********************
average rmse: 11.374066285762844
num running: 9
----------------------------------------
Muse
	Fold 1: 
		rmse is 12.44815179425741.
********************
	Fold 2: 
		rmse is 13.905897781079048.
********************
	Fold 3: 
		rmse is 16.117682369370144.
********************
	Fold 4: 
		rmse is 20.029846774430847.
*******

		rmse is 12.820383748993002.
********************
	Fold 2: 
		rmse is 12.11644991209075.
********************
	Fold 3: 
		rmse is 13.867627253864113.
********************
	Fold 4: 
		rmse is 10.908969057304962.
********************
	Fold 5: 
		rmse is 13.145519488227142.
********************
average rmse: 12.571789892095994
num running: 4
----------------------------------------
Muse
	Fold 1: 
		rmse is 15.570396853443402.
********************
	Fold 2: 
		rmse is 21.57451248269287.
********************
	Fold 3: 
		rmse is 19.70680727170586.
********************
	Fold 4: 
		rmse is 20.10152674519599.
********************
	Fold 5: 
		rmse is 16.55636425236171.
********************
average rmse: 18.701921521079964
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 8.784212059887363.
********************
	Fold 2: 
		rmse is 14.81957015790489.
********************
	Fold 3: 
		rmse is 12.279372443390262.
********************
	Fold 4: 
		rmse is 11.776815554032734.
*********

		rmse is 3.254214015024709.
********************
	Fold 2: 
		rmse is 2.2860554363375556.
********************
	Fold 3: 
		rmse is 3.683856427573302.
********************
	Fold 4: 
		rmse is 2.762576934661324.
********************
	Fold 5: 
		rmse is 3.8156753754224626.
********************
average rmse: 3.160475637803871
num running: 8
----------------------------------------
BLEU
	Fold 1: 
		rmse is 3.6207347697014822.
********************
	Fold 2: 
		rmse is 3.2202400776597675.
********************
	Fold 3: 
		rmse is 3.3399221958768868.
********************
	Fold 4: 
		rmse is 3.370021305374592.
********************
	Fold 5: 
		rmse is 4.621337500872938.
********************
average rmse: 3.634451169897133
num running: 9
----------------------------------------
BLEU
	Fold 1: 
		rmse is 2.7964868413450144.
********************
	Fold 2: 
		rmse is 3.8193036017756374.
********************
	Fold 3: 
		rmse is 3.605561543173505.
********************
	Fold 4: 
		rmse is 3.260308640541809

		rmse is 13.196844877927148.
********************
	Fold 2: 
		rmse is 9.2434053018879.
********************
	Fold 3: 
		rmse is 13.497224932417518.
********************
	Fold 4: 
		rmse is 15.76343677851374.
********************
	Fold 5: 
		rmse is 14.113386774216364.
********************
average rmse: 13.162859732992535
num running: 10
----------------------------------------
Muse
	Fold 1: 
		rmse is 14.826513580305008.
********************
	Fold 2: 
		rmse is 14.840893098896426.
********************
	Fold 3: 
		rmse is 25.413986557116623.
********************
	Fold 4: 
		rmse is 13.838864411253267.
********************
	Fold 5: 
		rmse is 11.623945099632676.
********************
average rmse: 16.1088405494408
----------------------------------------
Vecmap
	Fold 1: 
		rmse is 17.306935739767347.
********************
	Fold 2: 
		rmse is 9.515661967207562.
********************
	Fold 3: 
		rmse is 16.081705118284493.
********************
	Fold 4: 
		rmse is 12.756465808938346.
********

In [6]:
print(wiki_result)
print(bli_result)

3.3436179690440633
14.385418379018194


In [7]:
gs_result

{'0.001_0.1': [3.4171218879499414, 15.36479030351926],
 '0.001_0.01': [4.6893205442214825, 18.62911236890664],
 '0.001_0.001': [4.700044202012377, 18.860369544229],
 '0.001_0.0005': [4.188015246555515, 18.41793653091694],
 '0.0005_0.1': [3.2933106838380986, 15.589226411671614],
 '0.0005_0.01': [3.644092206778777, 16.183110780966953],
 '0.0005_0.001': [3.795399166317573, 17.35888196269147],
 '0.0005_0.0005': [3.768418674587734, 17.1951570902147],
 '0.0001_0.1': [3.2897075369769637, 14.592306806221533],
 '0.0001_0.01': [3.35197540209008, 14.13672426406568],
 '0.0001_0.001': [3.4118009003118237, 15.131422753833766],
 '0.0001_0.0005': [3.3436179690440633, 14.385418379018194]}