In [1]:
from comet_ml.api import API
import pandas as pd


def get_metric_data(experiment, metric):
    metrics = experiment.get_metrics(metric)
    hyp_1 = float(metrics[0]["metricValue"])
    hyp_2 = float(metrics[1]["metricValue"])
    return hyp_1, hyp_2


def build_hypothesis_data(experiment_name, beam, hypothesis, sari_val, bleu_val, sentence_bleu_val, f1_add_val, f1_keep_val, p_del_val):
    return pd.Series({"Experiment": experiment_name, "Beam": beam, "Hypothesis": hypothesis, "SARI": sari_val, "BLEU": bleu_val,
                      "Sentence_BLEU": sentence_bleu_val, "F1_Add": f1_add_val, "F1_Keep": f1_keep_val,
                      "P_Del": p_del_val, "Sum": sari_val + bleu_val})


def get_metrics(experiment, experiment_name):
    hypotheses_1 = []
    hypotheses_2 = []

    for i in [1, 2, 4, 6, 12]:
        sari_val_1, sari_val_2 = get_metric_data(experiment, "sari_score_"+str(i))
        bleu_val_1, bleu_val_2 = get_metric_data(experiment, "bleu_score_nltk_"+str(i))
        sentence_bleu_val_1, sentence_bleu_val_2 = get_metric_data(experiment, "avg_sentence_bleu_scores_"+str(i))
        f1_add_val_1, f1_add_val_2 = get_metric_data(experiment, "f1_add_"+str(i))
        f1_keep_val_1, f1_keep_val_2 = get_metric_data(experiment, "f1_keep_"+str(i))
        p_del_val_1, p_del_val_2 = get_metric_data(experiment, "p_del_"+str(i))

        hypotheses_1.append(build_hypothesis_data(experiment_name, i, 1, sari_val_1, bleu_val_1, sentence_bleu_val_1,
                                                  f1_add_val_1, f1_keep_val_1, p_del_val_1))
        hypotheses_2.append(build_hypothesis_data(experiment_name, i, 2, sari_val_2, bleu_val_2, sentence_bleu_val_2,
                                                  f1_add_val_2, f1_keep_val_2, p_del_val_2))

    return hypotheses_1, hypotheses_2


def prepare_project(experiment_name):
    experiments = comet_api.get_experiments(workspace, experiment_name)

    metric_data = []
    for i in experiments:
        experiment = comet_api.get_experiment_by_id(i.id)
        metric_data_hypotheses_1, metric_data_hypotheses_2 = get_metrics(experiment, experiment.name)
        metric_data.append(pd.DataFrame(metric_data_hypotheses_1))
        metric_data.append(pd.DataFrame(metric_data_hypotheses_2))

    overview_project = pd.concat(metric_data)

    return [overview_project, metric_data]

In [2]:
comet_api = API(api_key="tgrD5ElfTdvaGEmJB7AEZG8Ra")
workspace = "abeggluk"

In [3]:
projects_eval_names_mws = ["bart-mws-eval", "transformer-mws-eval-beam"]

projects_eval_mws = []
for i in projects_eval_names_mws:
    print(i)
    projects_eval_mws.append(prepare_project(i))

bart-mws-eval
transformer-mws-eval-beam


In [4]:
projects_eval_names_newsela = ["bart-newsela-eval", "transformer-newsela-eval-beam"]

projects_eval_newsela = []
for i in projects_eval_names_newsela:
    print(i)
    projects_eval_newsela.append(prepare_project(i))

bart-newsela-eval
transformer-newsela-eval-beam


In [5]:
from IPython.display import display

pd.options.display.float_format = '{:,.4f}'.format

for i in projects_eval_mws:
    for project in i[1]:
        display(project)

Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large,1,1,0.2193,0.5032,0.479,0.0001,0.6413,0.0166,0.7226
1,bart-large,2,1,0.2457,0.5193,0.474,0.0052,0.6451,0.0868,0.765
2,bart-large,4,1,0.2571,0.526,0.4706,0.003,0.6436,0.1246,0.7831
3,bart-large,6,1,0.2659,0.5264,0.4703,0.012,0.6427,0.1429,0.7923
4,bart-large,12,1,0.2617,0.5241,0.4691,0.0074,0.6416,0.1362,0.7859


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large,1,2,0.2193,0.5032,0.479,0.0001,0.6413,0.0166,0.7226
1,bart-large,2,2,0.2791,0.4986,0.4303,0.0218,0.6176,0.1978,0.7777
2,bart-large,4,2,0.2872,0.5075,0.4336,0.0208,0.6209,0.22,0.7947
3,bart-large,6,2,0.2819,0.5033,0.4265,0.0179,0.6145,0.2132,0.7851
4,bart-large,12,2,0.2921,0.5109,0.4377,0.0268,0.6218,0.2276,0.803


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large-cnn,1,1,0.2389,0.2397,0.2358,0.0207,0.6335,0.0626,0.4787
1,bart-large-cnn,2,1,0.2422,0.2506,0.2462,0.0234,0.6384,0.0647,0.4927
2,bart-large-cnn,4,1,0.2494,0.2477,0.244,0.0225,0.6347,0.0911,0.4971
3,bart-large-cnn,6,1,0.2497,0.2476,0.2445,0.0208,0.634,0.0942,0.4973
4,bart-large-cnn,12,1,0.2502,0.2519,0.2477,0.0195,0.6357,0.0955,0.5022


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large-cnn,1,2,0.2389,0.2397,0.2358,0.0207,0.6335,0.0626,0.4787
1,bart-large-cnn,2,2,0.244,0.2494,0.2442,0.0243,0.6379,0.0698,0.4934
2,bart-large-cnn,4,2,0.2527,0.2475,0.2436,0.024,0.6348,0.0994,0.5002
3,bart-large-cnn,6,2,0.2514,0.2463,0.2426,0.0213,0.6328,0.0999,0.4977
4,bart-large-cnn,12,2,0.2496,0.2505,0.2461,0.0203,0.6344,0.0941,0.5


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large r+q,1,1,0.278,0.4956,0.444,0.0254,0.6326,0.1761,0.7737
1,bart-large r+q,2,1,0.2746,0.5,0.438,0.0252,0.6252,0.1732,0.7746
2,bart-large r+q,4,1,0.2783,0.492,0.4203,0.0258,0.6096,0.1995,0.7703
3,bart-large r+q,6,1,0.2846,0.4913,0.4183,0.0283,0.61,0.2154,0.7759
4,bart-large r+q,12,1,0.2808,0.4946,0.4208,0.0243,0.6118,0.2063,0.7754


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large r+q,1,2,0.278,0.4956,0.444,0.0254,0.6326,0.1761,0.7737
1,bart-large r+q,2,2,0.3027,0.4543,0.3929,0.0351,0.6065,0.2663,0.7569
2,bart-large r+q,4,2,0.3008,0.4381,0.3698,0.0343,0.5833,0.2847,0.7389
3,bart-large r+q,6,2,0.3055,0.4389,0.3676,0.0339,0.5819,0.3007,0.7444
4,bart-large r+q,12,2,0.3043,0.4424,0.3747,0.0362,0.5836,0.2931,0.7467


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large scheduler,1,1,0.2309,0.5064,0.4762,0.0054,0.6422,0.045,0.7373
1,bart-large scheduler,2,1,0.2597,0.5134,0.4441,0.0153,0.6257,0.138,0.773
2,bart-large scheduler,4,1,0.2746,0.526,0.444,0.0177,0.6233,0.1829,0.8006
3,bart-large scheduler,6,1,0.2725,0.5204,0.441,0.0212,0.6197,0.1767,0.7929
4,bart-large scheduler,12,1,0.2769,0.5244,0.4419,0.0182,0.6218,0.1907,0.8013


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large scheduler,1,2,0.2309,0.5064,0.4762,0.0054,0.6422,0.045,0.7373
1,bart-large scheduler,2,2,0.3017,0.4928,0.3962,0.033,0.5955,0.2764,0.7945
2,bart-large scheduler,4,2,0.3201,0.5111,0.4132,0.0421,0.6172,0.301,0.8312
3,bart-large scheduler,6,2,0.319,0.5075,0.4079,0.0414,0.6121,0.3034,0.8264
4,bart-large scheduler,12,2,0.3213,0.5091,0.4181,0.0424,0.6177,0.3037,0.8303


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _0_2,1,1,0.2888,0.3652,0.3376,0.0065,0.5551,0.305,0.654
1,mws _0_2,2,1,0.2897,0.3461,0.3243,0.0049,0.5659,0.2983,0.6358
2,mws _0_2,4,1,0.2904,0.3528,0.3298,0.005,0.5697,0.2964,0.6432
3,mws _0_2,6,1,0.293,0.3589,0.3338,0.0043,0.5752,0.2994,0.6518
4,mws _0_2,12,1,0.2925,0.3592,0.335,0.0056,0.5736,0.2983,0.6517


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _0_2,1,2,0.2888,0.3652,0.3376,0.0065,0.5551,0.305,0.654
1,mws _0_2,2,2,0.2963,0.333,0.2984,0.0096,0.5411,0.3383,0.6293
2,mws _0_2,4,2,0.2896,0.3465,0.3219,0.007,0.5621,0.2997,0.636
3,mws _0_2,6,2,0.2937,0.3534,0.3267,0.0061,0.5706,0.3043,0.6471
4,mws _0_2,12,2,0.293,0.3552,0.3295,0.005,0.5714,0.3025,0.6481


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _0_3,1,1,0.2801,0.3719,0.345,0.0071,0.5579,0.2753,0.652
1,mws _0_3,2,1,0.2835,0.3579,0.333,0.006,0.5663,0.2783,0.6415
2,mws _0_3,4,1,0.2814,0.3628,0.3362,0.0055,0.5673,0.2712,0.6442
3,mws _0_3,6,1,0.2829,0.365,0.3387,0.005,0.5715,0.2721,0.6479
4,mws _0_3,12,1,0.2834,0.3643,0.3369,0.0054,0.5695,0.2754,0.6478


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _0_3,1,2,0.2801,0.3719,0.345,0.0071,0.5579,0.2753,0.652
1,mws _0_3,2,2,0.2948,0.3419,0.306,0.0112,0.5411,0.3321,0.6367
2,mws _0_3,4,2,0.2849,0.3541,0.3193,0.0055,0.5513,0.2978,0.639
3,mws _0_3,6,2,0.2845,0.3576,0.3261,0.0068,0.5577,0.2891,0.6421
4,mws _0_3,12,2,0.2874,0.3623,0.3326,0.006,0.5657,0.2905,0.6497


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _2_2,1,1,0.242,0.4916,0.4444,0.0103,0.6136,0.1022,0.7337
1,mws _2_2,2,1,0.2367,0.4456,0.4213,0.0044,0.6344,0.0711,0.6823
2,mws _2_2,4,1,0.2318,0.4437,0.4236,0.004,0.6383,0.0532,0.6755
3,mws _2_2,6,1,0.2307,0.4417,0.4241,0.0039,0.6411,0.0472,0.6724
4,mws _2_2,12,1,0.2264,0.4385,0.4213,0.0037,0.6385,0.0371,0.6649


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _2_2,1,2,0.242,0.4916,0.4444,0.0103,0.6136,0.1022,0.7337
1,mws _2_2,2,2,0.2813,0.4208,0.3688,0.0147,0.5904,0.2389,0.7021
2,mws _2_2,4,2,0.2638,0.4194,0.3856,0.01,0.6086,0.1728,0.6832
3,mws _2_2,6,2,0.2565,0.4197,0.391,0.0103,0.6127,0.1466,0.6763
4,mws _2_2,12,2,0.2506,0.4202,0.3937,0.0094,0.6205,0.1219,0.6708


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _2_3,1,1,0.234,0.5002,0.4633,0.006,0.6279,0.0681,0.7342
1,mws _2_3,2,1,0.2287,0.4385,0.4168,0.0024,0.6314,0.0524,0.6672
2,mws _2_3,4,1,0.2312,0.4408,0.4219,0.003,0.6382,0.0525,0.672
3,mws _2_3,6,1,0.2297,0.4378,0.4213,0.0032,0.6377,0.0483,0.6675
4,mws _2_3,12,1,0.2298,0.4342,0.4183,0.006,0.6396,0.0438,0.664


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _2_3,1,2,0.234,0.5002,0.4633,0.006,0.6279,0.0681,0.7342
1,mws _2_3,2,2,0.2932,0.4293,0.3707,0.0194,0.5993,0.2609,0.7226
2,mws _2_3,4,2,0.2693,0.4253,0.3921,0.0145,0.6158,0.1777,0.6946
3,mws _2_3,6,2,0.2596,0.422,0.395,0.0132,0.6214,0.1443,0.6817
4,mws _2_3,12,2,0.2493,0.4214,0.3975,0.0105,0.6227,0.1147,0.6707


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,msw _2_3 r+q _1,1,1,0.2404,0.5027,0.4636,0.0083,0.6306,0.0822,0.743
1,msw _2_3 r+q _1,2,1,0.2338,0.4434,0.425,0.004,0.6394,0.0579,0.6772
2,msw _2_3 r+q _1,4,1,0.2314,0.437,0.4189,0.0044,0.6386,0.0511,0.6683
3,msw _2_3 r+q _1,6,1,0.2289,0.4351,0.4193,0.0046,0.6405,0.0417,0.664
4,msw _2_3 r+q _1,12,1,0.2279,0.4278,0.4133,0.0056,0.6384,0.0396,0.6556


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,msw _2_3 r+q _1,1,2,0.2404,0.5027,0.4636,0.0083,0.6306,0.0822,0.743
1,msw _2_3 r+q _1,2,2,0.2881,0.4318,0.377,0.0206,0.5994,0.2442,0.7199
2,msw _2_3 r+q _1,4,2,0.2672,0.4336,0.4024,0.0152,0.6245,0.162,0.7008
3,msw _2_3 r+q _1,6,2,0.2553,0.4285,0.4029,0.0127,0.625,0.1283,0.6839
4,msw _2_3 r+q _1,12,2,0.2453,0.4247,0.4056,0.0098,0.6297,0.0963,0.67


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,msw _2_3 r+q _2,1,1,0.2269,0.503,0.4769,0.0048,0.6375,0.0385,0.7299
1,msw _2_3 r+q _2,2,1,0.2261,0.4509,0.4312,0.0027,0.6399,0.0358,0.677
2,msw _2_3 r+q _2,4,1,0.2258,0.4511,0.4317,0.0027,0.6404,0.0343,0.677
3,msw _2_3 r+q _2,6,1,0.2258,0.4511,0.4317,0.0027,0.6404,0.0343,0.677
4,msw _2_3 r+q _2,12,1,0.226,0.4516,0.4321,0.0027,0.6411,0.0343,0.6776


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,msw _2_3 r+q _2,1,2,0.2269,0.503,0.4769,0.0048,0.6375,0.0385,0.7299
1,msw _2_3 r+q _2,2,2,0.2769,0.425,0.3951,0.0134,0.6255,0.1918,0.7019
2,msw _2_3 r+q _2,4,2,0.2661,0.4244,0.3999,0.0112,0.627,0.16,0.6904
3,msw _2_3 r+q _2,6,2,0.2655,0.4273,0.4039,0.0101,0.6304,0.156,0.6928
4,msw _2_3 r+q _2,12,2,0.2583,0.4283,0.4063,0.0093,0.63,0.1357,0.6867


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _2_3 reward,1,1,0.239,0.5019,0.465,0.0069,0.635,0.0752,0.741
1,mws _2_3 reward,2,1,0.2315,0.4454,0.4266,0.0047,0.6418,0.0481,0.6769
2,mws _2_3 reward,4,1,0.2313,0.4413,0.424,0.0043,0.6434,0.0462,0.6726
3,mws _2_3 reward,6,1,0.2317,0.4396,0.423,0.0046,0.6445,0.0461,0.6714
4,mws _2_3 reward,12,1,0.2328,0.4341,0.4192,0.006,0.6434,0.049,0.6669


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,mws _2_3 reward,1,2,0.239,0.5019,0.465,0.0069,0.635,0.0752,0.741
1,mws _2_3 reward,2,2,0.2747,0.4255,0.3812,0.0148,0.6039,0.2056,0.7003
2,mws _2_3 reward,4,2,0.2625,0.4304,0.406,0.0174,0.6303,0.1397,0.6929
3,mws _2_3 reward,6,2,0.2579,0.4292,0.4072,0.0199,0.6314,0.1223,0.6871
4,mws _2_3 reward,12,2,0.2491,0.4247,0.4086,0.0175,0.6337,0.0961,0.6738


In [6]:
j = 0
for i in projects_eval_mws:
    print(projects_eval_names_mws[j], int(len(i[1])/2))
    j = j+1

bart-mws-eval 4
transformer-mws-eval-beam 7


In [7]:
for i in projects_eval_newsela:
    for project in i[1]:
        display(project)

Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large,1,1,0.2901,0.2661,0.254,0.0485,0.4118,0.4101,0.5563
1,newsela bart-large,2,1,0.2851,0.2651,0.254,0.044,0.4095,0.4017,0.5502
2,newsela bart-large,4,1,0.2948,0.268,0.2569,0.0478,0.4109,0.4258,0.5628
3,newsela bart-large,6,1,0.3028,0.2722,0.2591,0.0521,0.4114,0.445,0.575
4,newsela bart-large,12,1,0.3053,0.2731,0.2582,0.0536,0.41,0.4522,0.5783


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large,1,2,0.2901,0.2661,0.254,0.0485,0.4118,0.4101,0.5563
1,newsela bart-large,2,2,0.3273,0.2525,0.2387,0.0568,0.4068,0.5182,0.5797
2,newsela bart-large,4,2,0.3249,0.2566,0.2428,0.0564,0.4066,0.5116,0.5815
3,newsela bart-large,6,2,0.3277,0.2582,0.2446,0.0552,0.4078,0.52,0.5858
4,newsela bart-large,12,2,0.3295,0.2587,0.2428,0.0546,0.4058,0.5283,0.5883


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large-cnn,1,1,0.2686,0.117,0.1139,0.0399,0.405,0.3608,0.3856
1,newsela bart-large-cnn,2,1,0.2392,0.1201,0.117,0.0393,0.3909,0.2873,0.3593
2,newsela bart-large-cnn,4,1,0.2314,0.117,0.1143,0.0354,0.3867,0.2721,0.3484
3,newsela bart-large-cnn,6,1,0.228,0.1198,0.1171,0.0334,0.3858,0.265,0.3479
4,newsela bart-large-cnn,12,1,0.2249,0.1174,0.1148,0.0323,0.3843,0.258,0.3423


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large-cnn,1,2,0.2686,0.117,0.1139,0.0399,0.405,0.3608,0.3856
1,newsela bart-large-cnn,2,2,0.2392,0.1201,0.117,0.0393,0.3909,0.2873,0.3593
2,newsela bart-large-cnn,4,2,0.2312,0.1179,0.1151,0.0348,0.3867,0.2723,0.3491
3,newsela bart-large-cnn,6,2,0.228,0.1198,0.1171,0.0334,0.3858,0.265,0.3479
4,newsela bart-large-cnn,12,2,0.2248,0.1174,0.1148,0.0321,0.3844,0.258,0.3422


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large 2 scheduler,1,1,0.2003,0.2391,0.2453,0.0121,0.3915,0.1972,0.4394
1,newsela bart-large 2 scheduler,2,1,0.2241,0.2394,0.2406,0.017,0.3865,0.2687,0.4634
2,newsela bart-large 2 scheduler,4,1,0.237,0.243,0.2428,0.0208,0.3886,0.3016,0.48
3,newsela bart-large 2 scheduler,6,1,0.2451,0.2476,0.2453,0.0209,0.3934,0.3211,0.4927
4,newsela bart-large 2 scheduler,12,1,0.2439,0.2461,0.2443,0.0202,0.3918,0.3196,0.49


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large 2 scheduler,1,2,0.2003,0.2391,0.2453,0.0121,0.3915,0.1972,0.4394
1,newsela bart-large 2 scheduler,2,2,0.2851,0.2327,0.221,0.0319,0.3765,0.4471,0.5179
2,newsela bart-large 2 scheduler,4,2,0.2864,0.2311,0.2237,0.0328,0.3787,0.4478,0.5175
3,newsela bart-large 2 scheduler,6,2,0.283,0.2304,0.2245,0.0327,0.379,0.4373,0.5134
4,newsela bart-large 2 scheduler,12,2,0.2882,0.2352,0.2292,0.0364,0.3838,0.4446,0.5234


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large-cnn 2 scheduler,1,1,0.2094,0.1175,0.1143,0.0345,0.393,0.2009,0.327
1,newsela bart-large-cnn 2 scheduler,2,1,0.1727,0.1223,0.1193,0.0341,0.3791,0.1049,0.295
2,newsela bart-large-cnn 2 scheduler,4,1,0.1728,0.1182,0.1153,0.0309,0.3767,0.1109,0.291
3,newsela bart-large-cnn 2 scheduler,6,1,0.1722,0.1204,0.1173,0.0277,0.376,0.113,0.2926
4,newsela bart-large-cnn 2 scheduler,12,1,0.1719,0.118,0.115,0.0283,0.376,0.1114,0.29


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large-cnn 2 scheduler,1,2,0.2094,0.1175,0.1143,0.0345,0.393,0.2009,0.327
1,newsela bart-large-cnn 2 scheduler,2,2,0.1727,0.1223,0.1193,0.0341,0.3791,0.1049,0.295
2,newsela bart-large-cnn 2 scheduler,4,2,0.1729,0.1192,0.1162,0.0303,0.3767,0.1118,0.2921
3,newsela bart-large-cnn 2 scheduler,6,2,0.1722,0.1204,0.1173,0.0277,0.376,0.113,0.2926
4,newsela bart-large-cnn 2 scheduler,12,2,0.1718,0.118,0.115,0.0282,0.376,0.1112,0.2899


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large 3 scheduler 5e-7,1,1,0.2489,0.2606,0.2567,0.0314,0.4109,0.3046,0.5095
1,newsela bart-large 3 scheduler 5e-7,2,1,0.2536,0.2576,0.2507,0.0306,0.4001,0.3302,0.5113
2,newsela bart-large 3 scheduler 5e-7,4,1,0.2664,0.2643,0.2562,0.0338,0.4061,0.3592,0.5306
3,newsela bart-large 3 scheduler 5e-7,6,1,0.2709,0.2658,0.2568,0.0358,0.4065,0.3703,0.5366
4,newsela bart-large 3 scheduler 5e-7,12,1,0.278,0.2706,0.2593,0.039,0.4104,0.3847,0.5486


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela bart-large 3 scheduler 5e-7,1,2,0.2489,0.2606,0.2567,0.0314,0.4109,0.3046,0.5095
1,newsela bart-large 3 scheduler 5e-7,2,2,0.311,0.252,0.2418,0.0528,0.4047,0.4755,0.563
2,newsela bart-large 3 scheduler 5e-7,4,2,0.3175,0.2561,0.2442,0.0545,0.407,0.4909,0.5736
3,newsela bart-large 3 scheduler 5e-7,6,2,0.3241,0.2645,0.251,0.0599,0.4129,0.4995,0.5886
4,newsela bart-large 3 scheduler 5e-7,12,2,0.3223,0.2608,0.248,0.0571,0.4072,0.5025,0.5831


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large r+q (5th epoch),1,1,0.3254,0.2657,0.2476,0.0531,0.4156,0.5076,0.5911
1,bart-large r+q (5th epoch),2,1,0.331,0.2657,0.2453,0.0545,0.4145,0.5239,0.5967
2,bart-large r+q (5th epoch),4,1,0.3358,0.2643,0.2421,0.0544,0.4114,0.5415,0.6001
3,bart-large r+q (5th epoch),6,1,0.3404,0.2671,0.243,0.0555,0.4145,0.551,0.6075
4,bart-large r+q (5th epoch),12,1,0.345,0.265,0.2413,0.0557,0.4127,0.5668,0.61


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bart-large r+q (5th epoch),1,2,0.3254,0.2657,0.2476,0.0531,0.4156,0.5076,0.5911
1,bart-large r+q (5th epoch),2,2,0.3457,0.2405,0.2174,0.05,0.3938,0.5932,0.5861
2,bart-large r+q (5th epoch),4,2,0.3514,0.2406,0.2198,0.0511,0.3955,0.6077,0.592
3,bart-large r+q (5th epoch),6,2,0.3494,0.2362,0.2154,0.0493,0.3908,0.6081,0.5856
4,bart-large r+q (5th epoch),12,2,0.3544,0.2376,0.2162,0.0517,0.3907,0.6207,0.592


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bert-large r+sari (5th epoch),1,1,0.3262,0.2603,0.2432,0.0548,0.4116,0.5121,0.5864
1,bert-large r+sari (5th epoch),2,1,0.3346,0.2668,0.2445,0.0564,0.415,0.5324,0.6014
2,bert-large r+sari (5th epoch),4,1,0.3383,0.2651,0.2421,0.0559,0.4112,0.5479,0.6034
3,bert-large r+sari (5th epoch),6,1,0.3417,0.2657,0.242,0.0564,0.4127,0.556,0.6075
4,bert-large r+sari (5th epoch),12,1,0.343,0.2597,0.2364,0.0544,0.4088,0.5656,0.6027


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,bert-large r+sari (5th epoch),1,2,0.3262,0.2603,0.2432,0.0548,0.4116,0.5121,0.5864
1,bert-large r+sari (5th epoch),2,2,0.3427,0.2363,0.2148,0.0506,0.3886,0.5888,0.5789
2,bert-large r+sari (5th epoch),4,2,0.3484,0.2407,0.2172,0.0524,0.3917,0.601,0.589
3,bert-large r+sari (5th epoch),6,2,0.352,0.2405,0.2182,0.0514,0.3954,0.6092,0.5925
4,bert-large r+sari (5th epoch),12,2,0.3537,0.2328,0.2116,0.0515,0.3888,0.6207,0.5864


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _4,1,1,0.3222,0.0322,0.0524,0.0087,0.1673,0.7907,0.3544
1,newsela _4,2,1,0.3223,0.031,0.049,0.008,0.1683,0.7908,0.3533
2,newsela _4,4,1,0.321,0.03,0.0477,0.0088,0.1661,0.788,0.351
3,newsela _4,6,1,0.3224,0.031,0.0477,0.0085,0.1701,0.7885,0.3534
4,newsela _4,12,1,0.3226,0.0307,0.0469,0.0085,0.1727,0.7867,0.3534


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _4,1,2,0.3222,0.0322,0.0524,0.0087,0.1673,0.7907,0.3544
1,newsela _4,2,2,0.3214,0.0304,0.0492,0.0079,0.1651,0.7911,0.3518
2,newsela _4,4,2,0.3204,0.0297,0.0482,0.0082,0.1637,0.7894,0.3501
3,newsela _4,6,2,0.3215,0.0309,0.0486,0.0085,0.1668,0.7894,0.3524
4,newsela _4,12,2,0.3225,0.031,0.0475,0.0086,0.1699,0.7891,0.3535


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6,1,1,0.3386,0.1389,0.128,0.0154,0.2929,0.7075,0.4775
1,newsela _6,2,1,0.3369,0.126,0.121,0.0122,0.3071,0.6915,0.4629
2,newsela _6,4,1,0.3363,0.1265,0.1228,0.0114,0.3144,0.683,0.4627
3,newsela _6,6,1,0.3344,0.1258,0.1225,0.0111,0.3155,0.6766,0.4602
4,newsela _6,12,1,0.331,0.1209,0.1196,0.0111,0.3153,0.6668,0.4519


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6,1,2,0.3386,0.1389,0.128,0.0154,0.2929,0.7075,0.4775
1,newsela _6,2,2,0.3398,0.1249,0.1195,0.0128,0.3005,0.7062,0.4647
2,newsela _6,4,2,0.3376,0.1269,0.1218,0.0119,0.3093,0.6916,0.4645
3,newsela _6,6,2,0.3379,0.1271,0.1235,0.0119,0.315,0.6869,0.465
4,newsela _6,12,2,0.3337,0.124,0.1209,0.0113,0.3153,0.6744,0.4576


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 reward 1st epoch,1,1,0.3383,0.1353,0.1256,0.0146,0.2868,0.7136,0.4736
1,newsela _6 reward 1st epoch,2,1,0.3388,0.1234,0.12,0.0129,0.3056,0.6979,0.4621
2,newsela _6 reward 1st epoch,4,1,0.3378,0.1265,0.1231,0.0129,0.311,0.6895,0.4642
3,newsela _6 reward 1st epoch,6,1,0.3383,0.1267,0.124,0.0131,0.3142,0.6874,0.465
4,newsela _6 reward 1st epoch,12,1,0.3375,0.125,0.1236,0.0135,0.3198,0.6791,0.4625


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 reward 1st epoch,1,2,0.3383,0.1353,0.1256,0.0146,0.2868,0.7136,0.4736
1,newsela _6 reward 1st epoch,2,2,0.3388,0.1194,0.1163,0.012,0.2965,0.7078,0.4582
2,newsela _6 reward 1st epoch,4,2,0.3384,0.1271,0.1224,0.0119,0.306,0.6973,0.4655
3,newsela _6 reward 1st epoch,6,2,0.3387,0.1274,0.1237,0.0123,0.31,0.6939,0.4661
4,newsela _6 reward 1st epoch,12,2,0.3363,0.1272,0.1243,0.0119,0.3155,0.6817,0.4635


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 reward 4th epoch,1,1,0.3372,0.1509,0.1403,0.015,0.3122,0.6843,0.488
1,newsela _6 reward 4th epoch,2,1,0.3351,0.1278,0.1257,0.0129,0.3146,0.6778,0.4629
2,newsela _6 reward 4th epoch,4,1,0.335,0.1281,0.1258,0.0134,0.3167,0.6748,0.4631
3,newsela _6 reward 4th epoch,6,1,0.3338,0.1277,0.1261,0.0129,0.3183,0.6703,0.4616
4,newsela _6 reward 4th epoch,12,1,0.3339,0.1253,0.1256,0.0121,0.3215,0.6681,0.4593


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 reward 4th epoch,1,2,0.3372,0.1509,0.1403,0.015,0.3122,0.6843,0.488
1,newsela _6 reward 4th epoch,2,2,0.3363,0.1233,0.1208,0.0129,0.3062,0.6899,0.4596
2,newsela _6 reward 4th epoch,4,2,0.3379,0.1276,0.125,0.0124,0.3178,0.6835,0.4655
3,newsela _6 reward 4th epoch,6,2,0.3356,0.128,0.1256,0.0129,0.3175,0.6763,0.4635
4,newsela _6 reward 4th epoch,12,2,0.334,0.1238,0.1234,0.0124,0.3196,0.6699,0.4578


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 r+q,1,1,0.34,0.1542,0.1415,0.0161,0.3097,0.6943,0.4942
1,newsela _6 r+q,2,1,0.339,0.1332,0.1289,0.013,0.3178,0.6862,0.4722
2,newsela _6 r+q,4,1,0.3379,0.133,0.1298,0.0127,0.3218,0.6793,0.4709
3,newsela _6 r+q,6,1,0.3384,0.1346,0.1313,0.0125,0.3254,0.6774,0.473
4,newsela _6 r+q,12,1,0.3384,0.1338,0.1318,0.0127,0.3296,0.6729,0.4722


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 r+q,1,2,0.34,0.1542,0.1415,0.0161,0.3097,0.6943,0.4942
1,newsela _6 r+q,2,2,0.3405,0.1293,0.1253,0.0132,0.3102,0.6981,0.4698
2,newsela _6 r+q,4,2,0.3375,0.1332,0.1285,0.0138,0.3155,0.6832,0.4706
3,newsela _6 r+q,6,2,0.3377,0.1336,0.1299,0.0131,0.3203,0.6795,0.4713
4,newsela _6 r+q,12,2,0.3364,0.1302,0.1284,0.0124,0.3218,0.675,0.4666


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 r+sari 5th epoch,1,1,0.3394,0.154,0.1425,0.0156,0.3155,0.6872,0.4934
1,newsela _6 r+sari 5th epoch,2,1,0.3367,0.1296,0.127,0.0132,0.3169,0.68,0.4663
2,newsela _6 r+sari 5th epoch,4,1,0.3359,0.1305,0.1275,0.0135,0.3189,0.6753,0.4664
3,newsela _6 r+sari 5th epoch,6,1,0.3348,0.1299,0.128,0.013,0.3208,0.6706,0.4647
4,newsela _6 r+sari 5th epoch,12,1,0.3342,0.1274,0.1267,0.0127,0.3212,0.6689,0.4616


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 r+sari 5th epoch,1,2,0.3394,0.154,0.1425,0.0156,0.3155,0.6872,0.4934
1,newsela _6 r+sari 5th epoch,2,2,0.3378,0.123,0.1207,0.0127,0.3066,0.6942,0.4608
2,newsela _6 r+sari 5th epoch,4,2,0.3389,0.1284,0.1255,0.0129,0.3157,0.6879,0.4673
3,newsela _6 r+sari 5th epoch,6,2,0.3373,0.1272,0.1255,0.0124,0.3208,0.6787,0.4645
4,newsela _6 r+sari 5th epoch,12,2,0.3353,0.125,0.1251,0.0129,0.3193,0.6737,0.4603


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 r+sari best,1,1,0.3395,0.1532,0.1415,0.0163,0.3122,0.6899,0.4927
1,newsela _6 r+sari best,2,1,0.3367,0.1314,0.128,0.0126,0.3176,0.6798,0.4681
2,newsela _6 r+sari best,4,1,0.3377,0.1335,0.1307,0.0128,0.3248,0.6755,0.4712
3,newsela _6 r+sari best,6,1,0.3371,0.1328,0.1299,0.0125,0.3243,0.6744,0.4698
4,newsela _6 r+sari best,12,1,0.3369,0.1315,0.1304,0.0122,0.3281,0.6703,0.4684


Unnamed: 0,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,newsela _6 r+sari best,1,2,0.3395,0.1532,0.1415,0.0163,0.3122,0.6899,0.4927
1,newsela _6 r+sari best,2,2,0.3377,0.1276,0.1233,0.0129,0.3081,0.6922,0.4653
2,newsela _6 r+sari best,4,2,0.3376,0.1307,0.1272,0.0129,0.3159,0.6838,0.4683
3,newsela _6 r+sari best,6,2,0.3382,0.1336,0.1299,0.012,0.3224,0.6801,0.4717
4,newsela _6 r+sari best,12,2,0.3364,0.1306,0.129,0.0117,0.325,0.6724,0.467


In [8]:
j = 0
for i in projects_eval_newsela:
    print(projects_eval_names_newsela[j], int(len(i[1])/2))
    j = j+1

bart-newsela-eval 7
transformer-newsela-eval-beam 7


In [9]:
best_beam_projects_eval_mws = pd.DataFrame([
    projects_eval_mws[0][1][0].iloc[3],
    projects_eval_mws[0][1][1].iloc[4],
    projects_eval_mws[0][1][2].iloc[4],
    projects_eval_mws[0][1][3].iloc[2],
    projects_eval_mws[0][1][4].iloc[3],
    projects_eval_mws[0][1][5].iloc[1],
    projects_eval_mws[0][1][6].iloc[4],
    projects_eval_mws[0][1][7].iloc[2],
    # ------------------------------- #
    projects_eval_mws[1][1][0].iloc[4],
    projects_eval_mws[1][1][1].iloc[4],
    projects_eval_mws[1][1][2].iloc[0],
    projects_eval_mws[1][1][3].iloc[4],
    projects_eval_mws[1][1][4].iloc[0],
    projects_eval_mws[1][1][5].iloc[1],
    projects_eval_mws[1][1][6].iloc[0],
    projects_eval_mws[1][1][7].iloc[1],
    projects_eval_mws[1][1][8].iloc[0],
    projects_eval_mws[1][1][9].iloc[1],
    projects_eval_mws[1][1][10].iloc[0],
    projects_eval_mws[1][1][11].iloc[1],
    projects_eval_mws[1][1][12].iloc[0],
    projects_eval_mws[1][1][13].iloc[1]
])

In [10]:
best_beam_projects_eval_newsela = pd.DataFrame([
    projects_eval_newsela[0][1][0].iloc[4],
    projects_eval_newsela[0][1][1].iloc[4],
    projects_eval_newsela[0][1][2].iloc[0],
    projects_eval_newsela[0][1][3].iloc[0],
    projects_eval_newsela[0][1][4].iloc[3],
    projects_eval_newsela[0][1][5].iloc[4],
    projects_eval_newsela[0][1][6].iloc[0],
    projects_eval_newsela[0][1][7].iloc[0],
    projects_eval_newsela[0][1][8].iloc[4],
    projects_eval_newsela[0][1][9].iloc[3],
    projects_eval_newsela[0][1][10].iloc[4],
    projects_eval_newsela[0][1][11].iloc[4],
    projects_eval_newsela[0][1][12].iloc[3],
    projects_eval_newsela[0][1][13].iloc[3],
    # ------------------------------- #
    projects_eval_newsela[1][1][0].iloc[4],
    projects_eval_newsela[1][1][1].iloc[1],
    projects_eval_newsela[1][1][2].iloc[0],
    projects_eval_newsela[1][1][3].iloc[1],
    projects_eval_newsela[1][1][4].iloc[0],
    projects_eval_newsela[1][1][5].iloc[1],
    projects_eval_newsela[1][1][6].iloc[0],
    projects_eval_newsela[1][1][7].iloc[1],
    projects_eval_newsela[1][1][8].iloc[0],
    projects_eval_newsela[1][1][9].iloc[1],
    projects_eval_newsela[1][1][10].iloc[0],
    projects_eval_newsela[1][1][11].iloc[1],
    projects_eval_newsela[1][1][12].iloc[0],
    projects_eval_newsela[1][1][13].iloc[1]
])

In [11]:
best_beam_projects_eval_mws = best_beam_projects_eval_mws.reset_index()

best_beam_projects_eval_mws_1 = []
best_beam_projects_eval_mws_2 = []
for i in range(len(best_beam_projects_eval_mws)):
    if i % 2 == 0:
        best_beam_projects_eval_mws_1.append(best_beam_projects_eval_mws.iloc[i])
    else:
        best_beam_projects_eval_mws_2.append(best_beam_projects_eval_mws.iloc[i])

best_beam_projects_eval_mws_1 = pd.DataFrame(best_beam_projects_eval_mws_1).reset_index()
best_beam_projects_eval_mws_2 = pd.DataFrame(best_beam_projects_eval_mws_2).reset_index()

In [12]:
display(best_beam_projects_eval_mws_1)
display(best_beam_projects_eval_mws_2)

Unnamed: 0,level_0,index,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,0,3,bart-large,6,1,0.2659,0.5264,0.4703,0.012,0.6427,0.1429,0.7923
1,2,4,bart-large-cnn,12,1,0.2502,0.2519,0.2477,0.0195,0.6357,0.0955,0.5022
2,4,3,bart-large r+q,6,1,0.2846,0.4913,0.4183,0.0283,0.61,0.2154,0.7759
3,6,4,bart-large scheduler,12,1,0.2769,0.5244,0.4419,0.0182,0.6218,0.1907,0.8013
4,8,4,mws _0_2,12,1,0.2925,0.3592,0.335,0.0056,0.5736,0.2983,0.6517
5,10,0,mws _0_3,1,1,0.2801,0.3719,0.345,0.0071,0.5579,0.2753,0.652
6,12,0,mws _2_2,1,1,0.242,0.4916,0.4444,0.0103,0.6136,0.1022,0.7337
7,14,0,mws _2_3,1,1,0.234,0.5002,0.4633,0.006,0.6279,0.0681,0.7342
8,16,0,msw _2_3 r+q _1,1,1,0.2404,0.5027,0.4636,0.0083,0.6306,0.0822,0.743
9,18,0,msw _2_3 r+q _2,1,1,0.2269,0.503,0.4769,0.0048,0.6375,0.0385,0.7299


Unnamed: 0,level_0,index,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,1,4,bart-large,12,2,0.2921,0.5109,0.4377,0.0268,0.6218,0.2276,0.803
1,3,2,bart-large-cnn,4,2,0.2527,0.2475,0.2436,0.024,0.6348,0.0994,0.5002
2,5,1,bart-large r+q,2,2,0.3027,0.4543,0.3929,0.0351,0.6065,0.2663,0.7569
3,7,2,bart-large scheduler,4,2,0.3201,0.5111,0.4132,0.0421,0.6172,0.301,0.8312
4,9,4,mws _0_2,12,2,0.293,0.3552,0.3295,0.005,0.5714,0.3025,0.6481
5,11,4,mws _0_3,12,2,0.2874,0.3623,0.3326,0.006,0.5657,0.2905,0.6497
6,13,1,mws _2_2,2,2,0.2813,0.4208,0.3688,0.0147,0.5904,0.2389,0.7021
7,15,1,mws _2_3,2,2,0.2932,0.4293,0.3707,0.0194,0.5993,0.2609,0.7226
8,17,1,msw _2_3 r+q _1,2,2,0.2881,0.4318,0.377,0.0206,0.5994,0.2442,0.7199
9,19,1,msw _2_3 r+q _2,2,2,0.2769,0.425,0.3951,0.0134,0.6255,0.1918,0.7019


In [13]:
best_beam_projects_eval_newsela = best_beam_projects_eval_newsela.reset_index()

best_beam_projects_eval_newsela_1 = []
best_beam_projects_eval_newsela_2 = []
for i in range(len(best_beam_projects_eval_newsela)):
    if i % 2 == 0:
        best_beam_projects_eval_newsela_1.append(best_beam_projects_eval_newsela.iloc[i])
    else:
        best_beam_projects_eval_newsela_2.append(best_beam_projects_eval_newsela.iloc[i])

best_beam_projects_eval_newsela_1 = pd.DataFrame(best_beam_projects_eval_newsela_1).reset_index()
best_beam_projects_eval_newsela_2 = pd.DataFrame(best_beam_projects_eval_newsela_2).reset_index()

In [14]:
display(best_beam_projects_eval_newsela_1)
display(best_beam_projects_eval_newsela_2)

Unnamed: 0,level_0,index,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,0,4,newsela bart-large,12,1,0.3053,0.2731,0.2582,0.0536,0.41,0.4522,0.5783
1,2,0,newsela bart-large-cnn,1,1,0.2686,0.117,0.1139,0.0399,0.405,0.3608,0.3856
2,4,3,newsela bart-large 2 scheduler,6,1,0.2451,0.2476,0.2453,0.0209,0.3934,0.3211,0.4927
3,6,0,newsela bart-large-cnn 2 scheduler,1,1,0.2094,0.1175,0.1143,0.0345,0.393,0.2009,0.327
4,8,4,newsela bart-large 3 scheduler 5e-7,12,1,0.278,0.2706,0.2593,0.039,0.4104,0.3847,0.5486
5,10,4,bart-large r+q (5th epoch),12,1,0.345,0.265,0.2413,0.0557,0.4127,0.5668,0.61
6,12,3,bert-large r+sari (5th epoch),6,1,0.3417,0.2657,0.242,0.0564,0.4127,0.556,0.6075
7,14,4,newsela _4,12,1,0.3226,0.0307,0.0469,0.0085,0.1727,0.7867,0.3534
8,16,0,newsela _6,1,1,0.3386,0.1389,0.128,0.0154,0.2929,0.7075,0.4775
9,18,0,newsela _6 reward 1st epoch,1,1,0.3383,0.1353,0.1256,0.0146,0.2868,0.7136,0.4736


Unnamed: 0,level_0,index,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,1,4,newsela bart-large,12,2,0.3295,0.2587,0.2428,0.0546,0.4058,0.5283,0.5883
1,3,0,newsela bart-large-cnn,1,2,0.2686,0.117,0.1139,0.0399,0.405,0.3608,0.3856
2,5,4,newsela bart-large 2 scheduler,12,2,0.2882,0.2352,0.2292,0.0364,0.3838,0.4446,0.5234
3,7,0,newsela bart-large-cnn 2 scheduler,1,2,0.2094,0.1175,0.1143,0.0345,0.393,0.2009,0.327
4,9,3,newsela bart-large 3 scheduler 5e-7,6,2,0.3241,0.2645,0.251,0.0599,0.4129,0.4995,0.5886
5,11,4,bart-large r+q (5th epoch),12,2,0.3544,0.2376,0.2162,0.0517,0.3907,0.6207,0.592
6,13,3,bert-large r+sari (5th epoch),6,2,0.352,0.2405,0.2182,0.0514,0.3954,0.6092,0.5925
7,15,1,newsela _4,2,2,0.3214,0.0304,0.0492,0.0079,0.1651,0.7911,0.3518
8,17,1,newsela _6,2,2,0.3398,0.1249,0.1195,0.0128,0.3005,0.7062,0.4647
9,19,1,newsela _6 reward 1st epoch,2,2,0.3388,0.1194,0.1163,0.012,0.2965,0.7078,0.4582


In [15]:
eval_data_mws = []
for i in [3, 1, 2, 4, 7, 8, 10]:
    eval_data_mws.append(best_beam_projects_eval_mws_1.iloc[i])
    eval_data_mws.append(best_beam_projects_eval_mws_2.iloc[i])

eval_data_mws = pd.DataFrame(eval_data_mws)

In [16]:
eval_data_mws = eval_data_mws[eval_data_mws.level_0.isin([7, 2, 5, 9, 15, 17, 21])]
display(eval_data_mws)

Unnamed: 0,level_0,index,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
3,7,2,bart-large scheduler,4,2,0.3201,0.5111,0.4132,0.0421,0.6172,0.301,0.8312
1,2,4,bart-large-cnn,12,1,0.2502,0.2519,0.2477,0.0195,0.6357,0.0955,0.5022
2,5,1,bart-large r+q,2,2,0.3027,0.4543,0.3929,0.0351,0.6065,0.2663,0.7569
4,9,4,mws _0_2,12,2,0.293,0.3552,0.3295,0.005,0.5714,0.3025,0.6481
7,15,1,mws _2_3,2,2,0.2932,0.4293,0.3707,0.0194,0.5993,0.2609,0.7226
8,17,1,msw _2_3 r+q _1,2,2,0.2881,0.4318,0.377,0.0206,0.5994,0.2442,0.7199
10,21,1,mws _2_3 reward,2,2,0.2747,0.4255,0.3812,0.0148,0.6039,0.2056,0.7003


In [17]:
eval_data_newsela = []
for i in [0, 1, 5, 6, 8, 9, 11, 13]:
    eval_data_newsela.append(best_beam_projects_eval_newsela_1.iloc[i])
    eval_data_newsela.append(best_beam_projects_eval_newsela_2.iloc[i])

eval_data_newsela = pd.DataFrame(eval_data_newsela)

In [18]:
eval_data_newsela = eval_data_newsela[eval_data_newsela.level_0.isin([1, 2, 11, 13, 16, 18, 22, 26])]
display(eval_data_newsela)

Unnamed: 0,level_0,index,Experiment,Beam,Hypothesis,SARI,BLEU,Sentence_BLEU,F1_Add,F1_Keep,P_Del,Sum
0,1,4,newsela bart-large,12,2,0.3295,0.2587,0.2428,0.0546,0.4058,0.5283,0.5883
1,2,0,newsela bart-large-cnn,1,1,0.2686,0.117,0.1139,0.0399,0.405,0.3608,0.3856
5,11,4,bart-large r+q (5th epoch),12,2,0.3544,0.2376,0.2162,0.0517,0.3907,0.6207,0.592
6,13,3,bert-large r+sari (5th epoch),6,2,0.352,0.2405,0.2182,0.0514,0.3954,0.6092,0.5925
8,16,0,newsela _6,1,1,0.3386,0.1389,0.128,0.0154,0.2929,0.7075,0.4775
9,18,0,newsela _6 reward 1st epoch,1,1,0.3383,0.1353,0.1256,0.0146,0.2868,0.7136,0.4736
11,22,0,newsela _6 r+q,1,1,0.34,0.1542,0.1415,0.0161,0.3097,0.6943,0.4942
13,26,0,newsela _6 r+sari best,1,1,0.3395,0.1532,0.1415,0.0163,0.3122,0.6899,0.4927
