In [1]:
import pandas as pd
from collections import Counter
from rouge import Rouge
import sys
import numpy as np
import torch
from transformers import BertModel, BertTokenizerFast
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler 
import bleurt.score as bleurt_score
import jieba
import re
from gensim.parsing.preprocessing import remove_stopwords
# model
from sklearn.model_selection import GridSearchCV
#LightGBM
import lightgbm as lgb

import matplotlib.pyplot as plt
from sklearn import datasets, ensemble
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split,KFold
from tqdm import tqdm_notebook as tqdm

In [2]:
get_metric_from_groupSize = lambda n: ('rouge-%d' % n)

def calculate_recall(ref, candidate, group_size=1):
    #print(ref)
    try: 
        #print(candidate)
        #Instantiate Rouge
        rouge_metric = get_metric_from_groupSize(group_size)
        rouge_stat = "r"
        rouge = Rouge(metrics=[rouge_metric], stats=[rouge_stat])
        outcome = rouge.get_scores(candidate, ref)
        return outcome[0][rouge_metric][rouge_stat]
    except:
        return None
          
def calculate_precision(ref, candidate, group_size=1):
    try:
        #Instantiate Rouge
        rouge_metric = get_metric_from_groupSize(group_size)
        rouge_stat = "p"
        rouge = Rouge(metrics=[rouge_metric], stats=[rouge_stat])

        outcome = rouge.get_scores(candidate, ref)
        return outcome[0][rouge_metric][rouge_stat]
    except:
        return None    
def calculate_f1(ref, candidate, group_size=1):
    try:
        #Instantiate Rouge
        rouge_metric = get_metric_from_groupSize(group_size)
        rouge_stat = "f"
        rouge = Rouge(metrics=[rouge_metric], stats=[rouge_stat])

        outcome = rouge.get_scores(candidate, ref)
        return outcome[0][rouge_metric][rouge_stat]
    except:
        return None    

In [25]:
def BLEU(ref, candidate, group_size): 
    ref = str(ref).split()
    candidate = str(candidate).split()
    if(group_size > 1):
        ref = formGroups(ref, group_size)
        candidate = formGroups(candidate, group_size)

    # compute word frequencies for the references and the candidate
    ref_counts = Counter(ref)
    candidate_counts = Counter(candidate)

    covered = 0
    total = 0
    
    # compute the coverage for each word
    for word, count in candidate_counts.items():
        covered += min(count, ref_counts[word])
        total += count
    
    return covered / len(candidate_counts)

def formGroups(words, group_size):

    if(group_size > len(words)):
        return words

    #Form groups
    to_return = []
    i = 0
    while (i + group_size - 1) < len(words):
        to_return.append(' '.join(words[i:i + group_size]))
        i+=1

    return to_return

In [4]:
def extractWordsIntoList(sentence):
    return jieba.lcut(sentence)

def extractWordsAndSeparateByCharacter(sentence, separator=" "):
    return separator.join(extractWordsIntoList(sentence))

def test():
    randomStrings = ["小明硕士毕业于中国科学院计算所","我来到北京清华大学","小明硕士毕业于中国科学院计算所，后在日本京都大学深造"]
    
    for string in randomStrings:
        output = extractWordsAndSeparateByCharacter(string)
        print("\nBefore: %s" % string)
        print("After: %s\n" % output)

In [27]:
#BLEU
def calculate_bleu(dataframe, reference_col='reference', translation_col='translation', word_group_size=1):
    series_data = [BLEU(row[reference_col], row[translation_col], group_size= word_group_size) for idx, row in dataframe.iterrows()]
    return pd.Series(data=series_data, index=dataframe.index) 

#ROUGE
def calculate_rouge_recall(dataframe, reference_col='reference', translation_col='translation', word_group_size=1):
    series_data = [calculate_recall(row[reference_col], row[translation_col], word_group_size) for idx, row in dataframe.iterrows()]
    return pd.Series(data=series_data, index=dataframe.index) 

def calculate_rouge_precision(dataframe, reference_col='reference', translation_col='translation', word_group_size=1):
    series_data = [calculate_precision(row[reference_col], row[translation_col], word_group_size) for idx, row in dataframe.iterrows()]
    return pd.Series(data=series_data, index=dataframe.index) 

def calculate_rouge_f1(dataframe, reference_col='reference', translation_col='translation', word_group_size=1):
    series_data = [calculate_f1(row[reference_col], row[translation_col], word_group_size) for idx, row in dataframe.iterrows()]
    return pd.Series(data=series_data, index=dataframe.index) 

#AUXILIARY
def getCSV(path):
    return pd.read_csv(path)

def non_text_removal(text):
    text = re.sub('[^a-zA-Z]', ' ', text).lower()
    if not text.strip():
        text = "Non text"
    #text = remove_stopwords(text)
    return text

def preProcessDataframe(dataframe, filePath, reference_col='reference', translation_col='translation'):
    if 'en-zh' in filePath:
        print("Detected en-zh... Pre-Processing Chinese...")
        series_data = [extractWordsAndSeparateByCharacter(row[translation_col]) for idx, row in dataframe.iterrows()]
        dataframe[translation_col] = pd.Series(data=series_data, index=dataframe.index)
        series_data = [extractWordsAndSeparateByCharacter(row[reference_col]) for idx, row in dataframe.iterrows()]
        dataframe[reference_col] = pd.Series(data=series_data, index=dataframe.index)
    elif "ru-en" in filePath:
        print("Detected ru-en...Skip preprocessing...")
        # only convert non-string to string
        dataframe['reference'] =  pd.Series([str(row[1]) for row in dataframe['reference'].iteritems()])
        dataframe['translation'] =  pd.Series([str(row[1]) for row in dataframe['translation'].iteritems()])   
        # no preprocessing for these pairs as the score decrease
        return dataframe     
    elif "en-fi" in filePath:
        print("Detected ru-en...Skip preprocessing...")
        # only convert non-string to string
        dataframe['reference'] =  pd.Series([str(row[1]) for row in dataframe['reference'].iteritems()])
        dataframe['translation'] =  pd.Series([str(row[1]) for row in dataframe['translation'].iteritems()])
        # no preprocessing for these pairs as the score decrease
        return dataframe    
    else:
        print("Preprocessing...")
        dataframe['reference'] =  pd.Series([non_text_removal(row[1]) for row in dataframe['reference'].iteritems()])
        dataframe['translation'] =  pd.Series([non_text_removal(row[1]) for row in dataframe['translation'].iteritems()])   
    return dataframe

def initBLEURT(csv):
    checkpoint = "bleurt\\test_checkpoint"
    scorer = bleurt_score.BleurtScorer(checkpoint)
    references = csv.reference
    candidates = csv.translation
    print("BLEURT calculating...")
    scores_bluert = [scorer.score(references=[row["reference"]],candidates= [row["translation"]])[0] for idx, row in csv.iterrows()]
    return scores_bluert

#MAIN
def main(filePath):
    #Get file path from arguments
    #filePath = sys.argv[1]
    
    #Import CSV
    csv = getCSV(filePath)
    csv = preProcessDataframe(csv, filePath)

    #BLEU calculation
    csv['bleu_w1'] = calculate_bleu(csv)
    csv['bleu_w2'] = calculate_bleu(csv, word_group_size=2)
    print("BLEU calculated...")

    #ROUGE calculation
    csv['rouge_recall_w1'] = calculate_rouge_recall(csv)
    csv['rouge_precision_w1'] = calculate_rouge_precision(csv)
    csv['rouge_f1_w1'] = calculate_rouge_f1(csv)
    print("ROUGE 1-gram calculated...")

    csv['rouge_precision_w2'] = calculate_rouge_precision(csv, word_group_size=2)
    csv['rouge_recall_w2'] = calculate_rouge_recall(csv, word_group_size=2)
    csv['rouge_f1_w2'] = calculate_rouge_f1(csv, word_group_size=2)
    print("ROUGE 2-gram calculated...")   
    csv['BLEURT'] = initBLEURT(csv)

    #PRINT
    #print(csv)
    return csv

def final_results(scores,z_score):
    mse = np.array([])
    pcorr = np.array([])
    kendallcorr = np.array([])
    metrics = scores.columns
    for col in scores.columns:
        mse = np.append(mse,mean_squared_error(z_score,scores[col]))
        pcorr = np.append(pcorr,z_score.corr(scores[col],method='pearson'))
        kendallcorr = np.append(kendallcorr,z_score.corr(scores[col],method='kendall'))
    return pd.DataFrame({'mse':mse,'pcorr':pcorr,'kendallcorr':kendallcorr},index = metrics)

In [29]:
path = "corpus"
corpus = "\\en-fi"
filePath = path + corpus + "\\scores.csv"

In [30]:
data = main(filePath)

Detected ru-en...Skip preprocessing...
BLEU calculated...
ROUGE 1-gram calculated...
ROUGE 2-gram calculated...
INFO:tensorflow:Reading checkpoint bleurt\test_checkpoint.
INFO:tensorflow:Config file found, reading.
INFO:tensorflow:Will load checkpoint dbleurt_tiny
INFO:tensorflow:Loads full paths and checks that files exists.
INFO:tensorflow:... name:dbleurt_tiny
INFO:tensorflow:... vocab_file:vocab.txt
INFO:tensorflow:... bert_config_file:bert_config.json
INFO:tensorflow:... do_lower_case:True
INFO:tensorflow:... max_seq_length:512
INFO:tensorflow:Creating BLEURT scorer.
INFO:tensorflow:Creating WordPiece tokenizer.
INFO:tensorflow:WordPiece tokenizer instantiated.
INFO:tensorflow:Creating Eager Mode predictor.
INFO:tensorflow:Loading model.
INFO:tensorflow:BLEURT initialized.
BLEURT calculating...


In [32]:
data.head()

Unnamed: 0,source,reference,translation,z-score,avg-score,annotators,bleu_w1,bleu_w2,rouge_recall_w1,rouge_precision_w1,rouge_f1_w1,rouge_precision_w2,rouge_recall_w2,rouge_f1_w2,BLEURT
0,"You can turn yourself into a pineapple, a dog ...","Voit muuttaa itsesi ananasta, koirasta tai Roy...","Voit muuttaa itsesi ananakseksi, koiraksi tai ...",-0.286195,34.2,5,0.5,0.272727,0.545455,0.5,0.521739,0.272727,0.3,0.285714,0.316181
1,Also shot were three men: two 29-year-olds and...,Myös ammuttiin kolme miestä: kaksi 29-vuotiait...,Myös kolmea miestä ammuttiin: kahta 29-vuotias...,0.547076,58.4,5,0.222222,0.0,0.222222,0.222222,0.222222,0.0,0.0,0.0,0.431411
2,The information is stored at the cash register...,Tiedot tallennetaan kassakoneisiin joka tapauk...,Tiedot kuitenkin tallentuvat kassoilla joka ta...,1.122476,74.6,5,0.4,0.111111,0.444444,0.4,0.421053,0.111111,0.125,0.117647,0.244749
3,Xinhua says that there were traces of hydrochl...,"Xinhua kertoo, että Xinyin näytteestä oli sunn...","Xinhua kertoo, että Xinyin sunnuntaina antamas...",0.383095,53.6,5,0.5,0.272727,0.75,0.5,0.6,0.272727,0.428571,0.333333,0.465652
4,"MacDonald, who was brought on board CBC's comm...",Voitaisiin kuulla CBD: n kommenttitiimin toimi...,"MacDonaldin, joka tuli CBC:n selostajatiimiin ...",-0.493065,32.25,4,0.2,0.071429,0.1875,0.2,0.193548,0.071429,0.066667,0.068966,-0.088476


# Evaluation the metrics score

In [33]:
scaler = StandardScaler()
scores = data.loc[:,'bleu_w1':]
scores = pd.DataFrame(scaler.fit_transform(scores),columns=scores.columns, index = scores.index)
scores.fillna(0,inplace=True)

In [34]:
final_results(scores,data['z-score'])

Unnamed: 0,mse,pcorr,kendallcorr
bleu_w1,0.898179,0.512997,0.341524
bleu_w2,1.065216,0.418989,0.293123
rouge_recall_w1,0.92588,0.497407,0.329099
rouge_precision_w1,0.881087,0.522616,0.348401
rouge_f1_w1,0.888932,0.518201,0.341341
rouge_precision_w2,1.05588,0.424243,0.296875
rouge_recall_w2,1.081815,0.409647,0.288378
rouge_f1_w2,1.062589,0.420468,0.292348
BLEURT,0.870491,0.52858,0.335507


In [17]:
def getScoreMetrics(corpus,corr_dict,scores_dict):
    print("Processing" + corpus)
    path = "corpus\\"
    filePath = path + corpus + "\\scores.csv"    
    data = main(filePath)
    # safe the score to csv file
    #data.to_csv("scores\\" + corpus + ".csv")
    scaler = StandardScaler()
    scores = data.loc[:,'bleu_w1':]
    scores = pd.DataFrame(scaler.fit_transform(scores),columns=scores.columns, index = scores.index)
    scores.fillna(0,inplace=True)
    corr_dict[corpus] = final_results(scores,data['z-score'])
    scores_dict[corpus]=final_results(scores,data['z-score'])
    return corr_dict, scores_dict

In [22]:
corpus_list = ['cs-en','de-en','en-fi','en-zh','ru-en','zh-en']
scores_dict = {}
corr_dict = {}
for corpus in corpus_list:
    corr_dict, scores_dict = getScoreMetrics(corpus,corr_dict,scores_dict)

BLEU calculated...
ROUGE 1-gram calculated...
ROUGE 2-gram calculated...
BLEU calculated...
ROUGE 1-gram calculated...
ROUGE 2-gram calculated...
BLEU calculated...
ROUGE 1-gram calculated...
ROUGE 2-gram calculated...
BLEU calculated...
ROUGE 1-gram calculated...
ROUGE 2-gram calculated...
BLEU calculated...
ROUGE 1-gram calculated...
ROUGE 2-gram calculated...
BLEU calculated...
ROUGE 1-gram calculated...
ROUGE 2-gram calculated...


# Regression Model

In [26]:
# read data
corpus = "en-fi"
filePath = "scores\\" + corpus  +".csv"
data = pd.read_csv(filePath)

In [27]:
scaler = StandardScaler()
scores = data.loc[:,'bleu_w1':]
scores = pd.DataFrame(scaler.fit_transform(scores),columns=scores.columns, index = scores.index)

In [28]:
final_results(scores,data['z-score'])

Unnamed: 0,mse,pcorr,kendallcorr
bleu_w1,0.861219,0.533798,0.354543
bleu_w2,0.992208,0.460078,0.316021
rouge_recall_w1,0.913888,0.504156,0.328229
rouge_precision_w1,0.85212,0.538919,0.357439
rouge_f1_w1,0.861046,0.533895,0.348998
rouge_precision_w2,1.006834,0.451846,0.310283
rouge_recall_w2,1.045857,0.429884,0.295437
rouge_f1_w2,1.016713,0.446286,0.303983
BLEURT,0.902396,0.510624,0.327299


In [24]:
def kfold_CV_pipe(x_train, y_train, model, n_splits = 5):
    all_scores = []
    # set up k-fold
    kf = KFold(n_splits=5)
    oof_prediction = np.zeros((len(x_train),))
    test_preds = 0
    for  _, (trn_idx, val_idx) in tqdm(enumerate(kf.split(x_train, y_train))):
        # set up the splitted data
        train       , val        = x_train.loc[trn_idx,:], x_train.loc[val_idx,:]
        train_target, val_target = y_train[trn_idx], y_train[val_idx]      
        #print(train,val)
        # encode     
        # model fitting
        model.fit(train, train_target)
        # get predicted values for oof data and whole test set
        temp_oof = model.predict(val)
        # get predicted values for whole data set aggregate from each fold iter
        oof_prediction[val_idx] = temp_oof    
        fold_score = pd.Series(val_target).corr(pd.Series(temp_oof,index = val_idx),method='pearson')
        print(fold_score)
        all_scores.append(fold_score)               
    return  oof_prediction, all_scores

In [192]:
params = {'n_estimators': 120,
          'max_depth': 6,
          'min_samples_split': 20,
          'learning_rate': 0.01}

In [193]:
scores.dropna(inplace=True)
data.dropna(inplace=True)
scores.reset_index(drop=True,inplace=True)
data.reset_index(drop=True,inplace=True)

In [194]:
model = ensemble.GradientBoostingRegressor(**params)
oof_prediction, all_scores = kfold_CV_pipe(scores,data['z-score'],model)
print("OOF correlation score")
data['z-score'].corr(pd.Series(oof_prediction,index = data.index),method='pearson')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for  _, (trn_idx, val_idx) in tqdm(enumerate(kf.split(x_train, y_train))):


0it [00:00, ?it/s]

0.38131197113153414
0.42251975962889443
0.4348521002403214
0.46239969926539337
0.414271929193322
OOF correlation score


0.4323567327869856

# Grid search

In [187]:
grid_search_params ={'learning_rate': [0.01,0.03,0.05,0.09],
                      'subsample'    : [0.9, 0.8,0.7],
                      'n_estimators' : [100,200,300,800],
                      'max_depth'    : [4,6,8,10],
                     
                     }
X = scores; y = data['z-score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
GBR = ensemble.GradientBoostingRegressor(random_state=2)
grid_GBR = GridSearchCV(estimator=GBR, param_grid = grid_search_params, cv = 3, n_jobs=-1)
grid_GBR.fit(X_train, y_train)
#Now we are using print statements to print the results. It will give the values of hyperparameters as a result.

print(" Results from Grid Search " )
print("\n The best estimator across ALL searched params:\n",grid_GBR.best_estimator_)
print("\n The best score across ALL searched params:\n",grid_GBR.best_score_)
print("\n The best parameters across ALL searched params:\n",grid_GBR.best_params_)

 Results from Grid Search 

 The best estimator across ALL searched params:
 GradientBoostingRegressor(learning_rate=0.01, max_depth=4, n_estimators=300,
                          random_state=2, subsample=0.8)

 The best score across ALL searched params:
 0.33710772103374964

 The best parameters across ALL searched params:
 {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 300, 'subsample': 0.8}


In [188]:
best_params = grid_GBR.best_params_
model = ensemble.GradientBoostingRegressor(**best_params)
oof_prediction, all_scores = kfold_CV_pipe(scores,data['z-score'],model)
print("OOF correlation score")
data['z-score'].corr(pd.Series(oof_prediction,index = data.index),method='pearson')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for  _, (trn_idx, val_idx) in tqdm(enumerate(kf.split(x_train, y_train))):


0it [00:00, ?it/s]

0.38064915507518426
0.4230069849581893
0.7018342029347516
0.5301830749654544
0.4307576208205121
OOF correlation score


0.5246008306730199

# LightGBM

In [35]:
grid_search_params ={'learning_rate': [0.005,0.01,0.03,0.05,0.09],
                      'subsample'    : [0.9, 0.8,0.7],
                      'n_estimators' : [100,200,300,800,1200],
                      'max_depth'    : [3,4,5,6,8], 
                     }
X = scores; y = data['z-score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
LightGBM = lgb.LGBMRegressor(boosting_type='gbdt', metric= 'rmse', random_state=2)
grid_GBR = GridSearchCV(estimator=LightGBM, param_grid = grid_search_params, cv = 3, n_jobs=-1)
grid_GBR.fit(X_train, y_train)
#Now we are using print statements to print the results. It will give the values of hyperparameters as a result.

print(" Results from Grid Search " )
print("\n The best estimator across ALL searched params:\n",grid_GBR.best_estimator_)
print("\n The best score across ALL searched params:\n",grid_GBR.best_score_)
print("\n The best parameters across ALL searched params:\n",grid_GBR.best_params_)

 Results from Grid Search 

 The best estimator across ALL searched params:
 LGBMRegressor(learning_rate=0.03, max_depth=3, metric='rmse', n_estimators=300,
              random_state=2, subsample=0.9)

 The best score across ALL searched params:
 0.34283245841679566

 The best parameters across ALL searched params:
 {'learning_rate': 0.03, 'max_depth': 3, 'n_estimators': 300, 'subsample': 0.9}


In [36]:
best_params = grid_GBR.best_params_
model = ensemble.GradientBoostingRegressor(**best_params)
oof_prediction, all_scores = kfold_CV_pipe(scores,data['z-score'],model)
print("OOF correlation score")
data['z-score'].corr(pd.Series(oof_prediction,index = data.index),method='pearson')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for  _, (trn_idx, val_idx) in tqdm(enumerate(kf.split(x_train, y_train))):


0it [00:00, ?it/s]

0.39188266306002856
0.4201159817793041
0.7157482094631457
0.5393351632656764
0.4303508632519454
OOF correlation score


0.5374586649686021

## This is for saving the best model for each language pairs:
- cs-en: 
    - GBR {'learning_rate': 0.03, 'max_depth': 4, 'n_estimators': 200, 'subsample': 0.8} -> 0.526041
    - Best PCorr - BLEURT -> 0.465266
- de-en:
    - GBR {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 300, 'subsample': 0.7} -> 0.4038856
    - Best PCorr - BLEURT -> 0.37235
- en-fi:
    - LightGBM {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 300, 'subsample': 0.7} -> 0.537458
    - Best PCorr - BLEURT -> 0.52858
- en-zh:
    - LightGBM {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'subsample': 0.9}-> 0.445767
    - Best PCorr - rouge_f1_w1	-> 	0.436083
- ru-en:
    - LightGBM {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 800, 'subsample': 0.9} -> 0.411640
    - Best PCorr - BLEURT-> 0.389512
- zh-en:
    - LightGBM {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.9} -> 0.390421
    - Best PCorr - BLEURT-> 0.343382	

# Apply on test set

In [6]:
def generateSupervisedData(train_data, test_data):
    scaler = StandardScaler()
    # scaling train data
    train_scores = train_data.loc[:,'bleu_w1':]
    train_scores = pd.DataFrame(scaler.fit_transform(train_scores),columns=train_scores.columns, index = train_scores.index)
    train_scores.fillna(0,inplace=True)
    # transforming test data
    test_scores = test_data.loc[:,'bleu_w1':]
    test_scores = pd.DataFrame(scaler.transform(test_scores),columns=test_scores.columns, index = test_scores.index)
    test_scores.fillna(0,inplace=True)
    return train_scores, test_scores
def pipeline(corpus, model_dict):
    train_path = "corpus\\" 
    test_path = "testset\\"   
    print(f"------------ Corpus [{corpus}] -------------")
    print("**Preprocessing and scoring train data...")
    train_data = main(train_path + corpus + "\\scores.csv")
    print("**Preprocessing and scoring test data...")
    test_data = main(test_path + corpus + "\\scores.csv")
    train_scores, test_scores = generateSupervisedData(train_data, test_data)
    model = model_dict[corpus]
    print("**Training regression model")
    model.fit(train_scores, train_data['z-score'])
    prediction = pd.Series(model.predict(test_scores),name = "metric", index = test_scores.index)
    test_df = getCSV(test_path + corpus + "\\scores.csv")
    final_df = pd.concat([test_df, prediction], axis = 1)
    return final_df

In [28]:
model_dict = {
    'cs-en':ensemble.GradientBoostingRegressor(learning_rate= 0.03, max_depth= 4, n_estimators= 200, subsample=0.8,random_state = 2),
    'de-en':ensemble.GradientBoostingRegressor(learning_rate= 0.01, max_depth= 4, n_estimators= 300, subsample=0.7,random_state = 2),
    'en-fi':lgb.LGBMRegressor(boosting_type='gbdt', learning_rate= 0.01, max_depth= 4, n_estimators= 300, subsample= 0.7,metric= 'rmse', random_state=2),
    'en-zh':lgb.LGBMRegressor(boosting_type='gbdt', learning_rate= 0.01, max_depth= 3, n_estimators= 300, subsample= 0.9,metric= 'rmse', random_state=2),
    'ru-en':lgb.LGBMRegressor(boosting_type='gbdt', learning_rate= 0.01, max_depth= 3, n_estimators= 800, subsample= 0.9,metric= 'rmse', random_state=2),
    'zh-en':lgb.LGBMRegressor(boosting_type='gbdt', learning_rate= 0.05, max_depth= 3, n_estimators= 200, subsample= 0.9,metric= 'rmse', random_state=2)
}
corpus_list_0=['cs-en','de-en','en-fi','en-zh']
corpus_list = ['ru-en','zh-en']
for corpus in corpus_list:
    predicted_df = pipeline(corpus, model_dict)
    predicted_df.to_csv("prediction\\" + corpus +"\\scores.csv",index=False)
    print(f"------------ Finished [{corpus}] -------------")

------------ Corpus [ru-en] -------------
**Preprocessing and scoring train data...
Detected ru-en...Skip preprocessing...
BLEU calculated...
ROUGE 1-gram calculated...
ROUGE 2-gram calculated...
INFO:tensorflow:Reading checkpoint bleurt\test_checkpoint.
INFO:tensorflow:Config file found, reading.
INFO:tensorflow:Will load checkpoint dbleurt_tiny
INFO:tensorflow:Loads full paths and checks that files exists.
INFO:tensorflow:... name:dbleurt_tiny
INFO:tensorflow:... vocab_file:vocab.txt
INFO:tensorflow:... bert_config_file:bert_config.json
INFO:tensorflow:... do_lower_case:True
INFO:tensorflow:... max_seq_length:512
INFO:tensorflow:Creating BLEURT scorer.
INFO:tensorflow:Creating WordPiece tokenizer.
INFO:tensorflow:WordPiece tokenizer instantiated.
INFO:tensorflow:Creating Eager Mode predictor.
INFO:tensorflow:Loading model.
INFO:tensorflow:BLEURT initialized.
BLEURT calculating...
**Preprocessing and scoring test data...
Detected ru-en...Skip preprocessing...
BLEU calculated...
ROUGE 