### 1. Settings

In [1]:
#####################################
##########  DEPENDECIES ############
#####################################

import os
import pickle
import sys
sys.path.append("../")

from tqdm import tqdm # type: ignore
from datetime import date

import evaluate

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

import utils.prep as pr
import utils.eval as ev
import utils.inference as infer
from utils.sampling import create_splits, prep_cv_validation
from utils.training import cv_cluster_set, cv_training_epochs_sets, test_cluster_set
from utils.training import results_dict_todf, cv_step_2, full_step_2, test_training_epochs_sets
from utils.inference import meta_predict, create_ensemble_map, ensemble_compute

tqdm.pandas()
import warnings
warnings.filterwarnings("ignore")

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import torch

#####################################
############  CONSTANTS #############
#####################################

RS = 42
BATCH_SIZE = 16
DECODER_LENGTH = 25
ENCODER_LENGTH = 25
MODEL_NAME = "Salesforce/codet5-base-multi-sum"

FULL_TRAIN_ARGS = {
    "BATCH_SIZE": BATCH_SIZE,
    "DECODER_LENGTH": DECODER_LENGTH,
    "ENCODER_LENGTH": ENCODER_LENGTH,
    "SEQ_TRAINER_ARGS": {
        "overwrite_output_dir": True,
        "num_train_epochs": [0, 1, 2, 5, 10],
        "do_train": True,
        "do_eval": True,
        "per_device_train_batch_size": 4,
        "per_device_eval_batch_size": 4,
        "learning_rate": 1e-5,
        "warmup_steps": 500,
        "weight_decay": 0.1,
        "label_smoothing_factor": 0.1,
        "predict_with_generate": True,
        "logging_steps": 100,
        "save_total_limit": 1,
        "save_strategy": "no",
        "logging_strategy": "epoch",
        "evaluation_strategy": "epoch",
        "load_best_model_at_end": False,
        "output_dir" : 'reports/results',
        "logging_dir" : "reports/logs",
    },
}

experiment_config = {
    "DATA_STR" : "20240721",
    "RS" : RS,
    "DRIFT_TYPE" : "no_drift",
    "NFOLD" : 3,
    "FULL_TRAIN_ARGS" : FULL_TRAIN_ARGS,
    "MODEL_NAME" : MODEL_NAME,
    "CLUSTER_EPOCHS" : 5,
    "CLUSTER_SET_ID" : [1, 4, 3, [0, 1, 4,]],
    "TRAIN_SIZE" : 7000,
    "TEST_SIZE" : 2500,
}
experiment_config["ANALYSIS_POSTFIX"] = f"mined_{experiment_config['DRIFT_TYPE']}_{str(date.today())}"
experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["output_dir"] += "/" + experiment_config["ANALYSIS_POSTFIX"] 
experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["logging_dir"] += "/" + experiment_config["ANALYSIS_POSTFIX"] 

if not os.path.exists(experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["logging_dir"]):
    os.mkdir(experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["logging_dir"])

if not os.path.exists(experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["output_dir"]):
    os.mkdir(experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["output_dir"])

tokenizer = AutoTokenizer.from_pretrained(experiment_config["MODEL_NAME"], skip_special_tokens=False)
model = AutoModelForSeq2SeqLM.from_pretrained(experiment_config["MODEL_NAME"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
rouge = evaluate.load('rouge')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/RDC/zinovyee.hub/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### 2. Conala data. Preprocessing. 

In [2]:
sampling_dict = create_splits(experiment_config=experiment_config, tokenizer=tokenizer, train_size=experiment_config["TRAIN_SIZE"], test_size=experiment_config["TEST_SIZE"], cluster_id=4)
train_dataset, test_data, test_df, train_df = sampling_dict["train_data"], sampling_dict["test_data"], sampling_dict["test_df"], sampling_dict["train_df"]

splits, questions_list = prep_cv_validation(train_dataset=train_dataset, 
                            experiment_config=experiment_config)

Train Data:  (7000, 11)
Test Data:  (2500, 11)
Train Data: Cluster cluster
2    3888
3    2114
4     596
1     234
0     168
Name: count, dtype: int64
Test Data: Cluster cluster
2    1358
3     777
4     220
1      74
0      71
Name: count, dtype: int64


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2500 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2500 [00:00<?, ? examples/s]

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

Fold 0
Fold 1
Fold 2


In [3]:
fold_results = cv_training_epochs_sets(experiment_config=experiment_config,
                            splits=splits,
                            questions_list=questions_list,
                            train_dataset=train_dataset,
                            tokenizer=tokenizer)

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_fold_epoch_set.pickle', 'wb') as handle:
    pickle.dump(fold_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

Fold 0


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

TRAINING EPOCH SET 0
TRAINING EPOCHS 0
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TRAINING EPOCH SET 1
TRAINING EPOCHS 1
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.1015,4.602828,0.2776,0.0758,0.2451,0.2453,13.7481,0.0261,1.0,1.099,25042,22787


TRAINING EPOCH SET 2
TRAINING EPOCHS 1
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.5601,4.520059,0.2873,0.079,0.2523,0.2525,13.8483,0.0282,1.0,1.1067,25219,22787


TRAINING EPOCH SET 5
TRAINING EPOCHS 3
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2923,4.469131,0.2879,0.0778,0.2511,0.2512,14.0159,0.0282,1.0,1.1277,25696,22787
2,4.4199,4.43292,0.2871,0.0794,0.2513,0.2513,14.1084,0.0283,1.0,1.1276,25695,22787
3,4.3169,4.424791,0.2896,0.0801,0.252,0.252,13.9739,0.0294,1.0,1.1135,25374,22787


TRAINING EPOCH SET 10
TRAINING EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.8284,4.470607,0.2842,0.0767,0.2492,0.2493,13.8346,0.0282,1.0,1.1033,25142,22787
2,4.0217,4.452669,0.2841,0.0776,0.2485,0.2486,14.1504,0.0286,1.0,1.1239,25610,22787
3,4.0457,4.421396,0.2854,0.0791,0.2506,0.2507,13.6761,0.0275,1.0,1.0778,24560,22787
4,4.0376,4.426142,0.2874,0.0798,0.2513,0.2512,13.8668,0.0287,1.0,1.1001,25069,22787
5,3.9849,4.431852,0.2877,0.0792,0.2514,0.2515,13.8749,0.0286,1.0,1.0982,25024,22787


Fold 1


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

TRAINING EPOCH SET 0
TRAINING EPOCHS 0
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TRAINING EPOCH SET 1
TRAINING EPOCHS 1
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.0938,4.611434,0.2702,0.0715,0.2384,0.2383,14.2152,0.0234,1.0,1.1695,26106,22323


TRAINING EPOCH SET 2
TRAINING EPOCHS 1
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.5448,4.532459,0.277,0.0745,0.2426,0.2423,14.1993,0.0261,1.0,1.1651,26009,22323


TRAINING EPOCH SET 5
TRAINING EPOCHS 3
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2728,4.486893,0.2777,0.0748,0.2432,0.243,14.2152,0.0263,1.0,1.1637,25977,22323
2,4.4081,4.443185,0.2822,0.0789,0.2479,0.2476,13.9734,0.0294,1.0,1.1349,25334,22323
3,4.3051,4.439881,0.281,0.0791,0.2464,0.2463,13.9563,0.0291,1.0,1.1322,25273,22323


TRAINING EPOCH SET 10
TRAINING EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.8039,4.497812,0.2766,0.0744,0.2426,0.2423,13.874,0.0273,1.0,1.1226,25059,22323
2,4.0074,4.461789,0.2786,0.0762,0.2453,0.2452,13.9991,0.027,1.0,1.1333,25298,22323
3,4.0333,4.438277,0.2777,0.0753,0.2437,0.2436,13.7861,0.0268,1.0,1.115,24891,22323
4,4.0234,4.447116,0.2772,0.0752,0.2433,0.2433,13.8508,0.0262,1.0,1.1134,24854,22323
5,3.9861,4.449587,0.2791,0.0761,0.2455,0.2454,13.8517,0.0271,1.0,1.1174,24944,22323


Fold 2


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

TRAINING EPOCH SET 0
TRAINING EPOCHS 0
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TRAINING EPOCH SET 1
TRAINING EPOCHS 1
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.0786,4.62724,0.2679,0.0728,0.238,0.2382,13.8127,0.0269,1.0,1.1156,25152,22545


TRAINING EPOCH SET 2
TRAINING EPOCHS 1
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.5367,4.547226,0.2753,0.0778,0.244,0.244,13.7767,0.0301,1.0,1.1098,25020,22545


TRAINING EPOCH SET 5
TRAINING EPOCHS 3
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2645,4.498985,0.2779,0.078,0.2451,0.2453,13.712,0.029,1.0,1.1011,24824,22545
2,4.4049,4.458007,0.2808,0.0775,0.2455,0.2455,13.7188,0.0279,1.0,1.1041,24892,22545
3,4.2948,4.456551,0.2824,0.0779,0.2476,0.2475,13.7471,0.0281,1.0,1.0998,24794,22545


TRAINING EPOCH SET 10
TRAINING EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.7974,4.505895,0.2769,0.0748,0.2437,0.2439,13.4411,0.0278,1.0,1.0674,24064,22545
2,4.007,4.479992,0.2765,0.0727,0.2408,0.241,13.5332,0.0275,1.0,1.0816,24385,22545
3,4.026,4.457977,0.2795,0.0762,0.2455,0.246,13.6674,0.0285,1.0,1.0872,24511,22545
4,4.0183,4.457972,0.2813,0.0773,0.2479,0.2483,13.691,0.0296,1.0,1.0847,24454,22545
5,3.9706,4.463252,0.2788,0.0748,0.2455,0.2457,13.6104,0.0288,1.0,1.0798,24344,22545


In [7]:
with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_fold_epoch_set.pickle', 'rb') as handle:
   fold_results = pickle.load(handle)
   
for cluster_idx in experiment_config["CLUSTER_SET_ID"]:
    fold_results = cv_cluster_set(experiment_config=experiment_config,
                                            splits=splits,
                                            questions_list=questions_list,
                                            train_dataset=train_dataset,
                                            tokenizer=tokenizer,
                                            fold_results=fold_results,
                                            cluster_id=cluster_idx)

cv_df = results_dict_todf(fold_results)

########## SAVE THE FILE

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_step1.pickle', 'wb') as handle:
    pickle.dump(cv_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

Fold 0


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [1] training size (160, 14)
TRAINING CLUSTER SET [1] FOR EPOCHS 5
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,6.1481,5.903144,0.128,0.0167,0.1146,0.1149,9.4567,0.0049,0.6824,0.7235,16487,22787
2,6.0342,5.810681,0.13,0.0173,0.1169,0.1171,9.4045,0.0055,0.6732,0.7165,16326,22787
3,5.8749,5.682106,0.1326,0.0188,0.1187,0.1189,9.3993,0.006,0.6727,0.7161,16317,22787
4,5.6631,5.549994,0.1358,0.02,0.1211,0.1212,9.4529,0.006,0.6786,0.7206,16420,22787
5,5.5069,5.425623,0.1413,0.0215,0.1257,0.1258,9.4666,0.0065,0.6784,0.7205,16417,22787


Fold 1


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [1] training size (149, 14)
TRAINING CLUSTER SET [1] FOR EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 2


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [1] training size (159, 14)
TRAINING CLUSTER SET [1] FOR EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 0


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [4] training size (399, 14)
TRAINING CLUSTER SET [4] FOR EPOCHS 5
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.8802,5.745393,0.1346,0.0188,0.1198,0.1202,9.6277,0.0058,0.706,0.7418,16903,22787
2,5.5346,5.407768,0.1514,0.0238,0.1344,0.1348,9.8989,0.0079,0.7408,0.7692,17528,22787
3,5.1834,5.133616,0.1702,0.0301,0.1523,0.1526,10.356,0.0111,0.8004,0.8179,18638,22787
4,4.886,4.976168,0.205,0.0446,0.1853,0.1853,11.3496,0.0161,0.8878,0.8936,20363,22787
5,4.6781,4.872675,0.2121,0.0495,0.1907,0.1907,11.8286,0.0188,0.9398,0.9415,21454,22787


Fold 1


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [4] training size (389, 14)
TRAINING CLUSTER SET [4] FOR EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 2


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [4] training size (404, 14)
TRAINING CLUSTER SET [4] FOR EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 0


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [3] training size (1395, 14)
TRAINING CLUSTER SET [3] FOR EPOCHS 5
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.4832,5.007508,0.1821,0.0359,0.164,0.1641,10.8136,0.0124,0.8505,0.8606,19611,22787
2,4.7815,4.706634,0.2402,0.0603,0.2149,0.2148,12.3226,0.0224,0.972,0.9724,22157,22787
3,4.4926,4.646046,0.2518,0.0646,0.2224,0.2224,12.7892,0.0232,1.0,1.0212,23269,22787
4,4.3556,4.627163,0.2544,0.0646,0.2237,0.2238,12.7836,0.0246,1.0,1.0233,23317,22787
5,4.288,4.622749,0.257,0.0663,0.2264,0.2262,12.901,0.025,1.0,1.0295,23460,22787


Fold 1


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [3] training size (1399, 14)
TRAINING CLUSTER SET [3] FOR EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 2


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [3] training size (1434, 14)
TRAINING CLUSTER SET [3] FOR EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 0


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0, 1, 4] training size (686, 14)
TRAINING CLUSTER SET [0, 1, 4] FOR EPOCHS 5
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.8608,5.507529,0.1421,0.0218,0.1261,0.1264,9.7922,0.0069,0.7295,0.7603,17324,22787
2,5.3047,5.023569,0.1914,0.038,0.1727,0.1727,10.8663,0.0137,0.8445,0.8554,19493,22787
3,4.9169,4.816772,0.2356,0.0593,0.213,0.2129,12.7785,0.0221,1.0,1.0161,23153,22787
4,4.686,4.748352,0.2393,0.0594,0.2154,0.2154,12.8573,0.0223,1.0,1.0226,23302,22787
5,4.5293,4.736448,0.2402,0.0589,0.2157,0.2157,12.7588,0.0222,1.0,1.0122,23065,22787


Fold 1


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0, 1, 4] training size (640, 14)
TRAINING CLUSTER SET [0, 1, 4] FOR EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 2


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0, 1, 4] training size (670, 14)
TRAINING CLUSTER SET [0, 1, 4] FOR EPOCHS 5
LOADING MODEL ./tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [8]:
print("Mean")
print(cv_df.groupby(["model_set"])["rouge"].mean())

print("STD")
print(cv_df.groupby("model_set")["rouge"].std())

Mean
model_set
0                    0.128471
1                    0.272787
2                    0.280769
5                    0.285255
10                   0.282760
cluster_[0, 1, 4]    0.241250
cluster_[1]          0.141068
cluster_[3]          0.259440
cluster_[4]          0.213261
Name: rouge, dtype: float64
STD
model_set
0                    0.135276
1                    0.159897
2                    0.161486
5                    0.162626
10                   0.163305
cluster_[0, 1, 4]    0.161949
cluster_[1]          0.140849
cluster_[3]          0.167286
cluster_[4]          0.159754
Name: rouge, dtype: float64


### Step 2. Learn performance

In [3]:
########## LOAD CV RESULTS

import pickle
with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_step1.pickle', 'rb') as handle:
    cv_df = pickle.load(handle)

########## RUN STEP 2 ON CV

cv_df, model_results = cv_step_2(experiment_config=experiment_config, cv_df=cv_df, t_models=["lgbm"])

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/s2_model_results.pickle', 'wb') as handle:
    pickle.dump(model_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_results.pickle', 'wb') as handle:
    pickle.dump(cv_df, handle, protocol=pickle.HIGHEST_PROTOCOL)


0
lgbm
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 8.063856 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13044
[LightGBM] [Info] Number of data points in the train set: 41994, number of used features: 1032
[LightGBM] [Info] Start training from score 0.232153
1
lgbm
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 8.241766 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13013
[LightGBM] [Info] Number of data points in the train set: 42003, number of used features: 1025
[LightGBM] [Info] Start training from score 0.235097
2
lgbm
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 8.215009 seconds.
You can set `force_row_wise=true` to remove the overhead.

In [4]:
### TO SAVE THE VECTORIZER AND STEP 2 MODELS

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_results.pickle', 'rb') as handle:
    cv_df = pickle.load(handle)


print("Mean")
print(cv_df.groupby(["model_set"])["rouge"].mean())

print("STD")
print(cv_df.groupby("model_set")["rouge"].std())


full_step_2(cv_df=cv_df, 
            experiment_config=experiment_config, t_models=["lgbm"])

Mean
model_set
0                    0.128471
1                    0.272787
2                    0.280769
5                    0.285255
10                   0.282760
cluster_[0, 1, 4]    0.241250
cluster_[1]          0.141068
cluster_[3]          0.259440
cluster_[4]          0.213261
ensemble             0.284587
Name: rouge, dtype: float64
STD
model_set
0                    0.135276
1                    0.159897
2                    0.161486
5                    0.162626
10                   0.163305
cluster_[0, 1, 4]    0.161949
cluster_[1]          0.140849
cluster_[3]          0.167286
cluster_[4]          0.159754
ensemble             0.162828
Name: rouge, dtype: float64
lgbm
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 11.044841 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19499
[LightGBM] [Info] Number of data points in the t

# TEST

In [5]:
# sampling_dict = create_splits(experiment_config=experiment_config, tokenizer=tokenizer, test=True, train_size=100, test_size=25, cluster_id=4)
# train_dataset, test_data, test_df, train_df = sampling_dict["train_data"], sampling_dict["test_data"], sampling_dict["test_df"], sampling_dict["train_df"]

# splits, questions_list = prep_cv_validation(train_dataset=train_dataset, 
#                             experiment_config=experiment_config)

In [6]:
with open(f"reports/results/{experiment_config['ANALYSIS_POSTFIX']}/cv_results.pickle", "rb") as handle:
    cv_resutls = pickle.load(handle)

base_models_list = list(cv_resutls.model_set.unique())
base_models_list.pop(-1)

'ensemble'

In [7]:
meta_preds_df = meta_predict(experiment_config=experiment_config, 
                    test_df=test_df,
                    base_models_names=base_models_list,
                    t_models=["lgbm"])

########## SAVE THE FILE

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_step2.pickle', 'wb') as handle:
    pickle.dump(meta_preds_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

lgbm


In [10]:
with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_step2.pickle', 'rb') as handle:
    meta_preds_df = pickle.load(handle)
    
meta_preds_df.groupby("model_set").lgbm_preds.mean()

model_set
0                    0.136463
1                    0.271276
2                    0.277438
5                    0.280957
10                   0.278436
cluster_[0, 1, 4]    0.247229
cluster_[1]          0.148851
cluster_[3]          0.262917
cluster_[4]          0.220177
Name: lgbm_preds, dtype: float64

In [11]:
optimal_ensemble_map, ensemble_val_estim = create_ensemble_map(meta_preds_df=meta_preds_df, 
                                                                t_model_name="lgbm")

In [12]:
test_result_df = test_training_epochs_sets(experiment_config=experiment_config,
                            test_df=test_df,
                            test_data=test_data,
                            train_data=train_dataset,
                            tokenizer=tokenizer)

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_epoch_set.pickle', 'wb') as handle:
    pickle.dump(test_result_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

TRAINING EPOCH SET 0
TRAINING EPOCHS 0
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TRAINING EPOCH SET 1
TRAINING EPOCHS 1
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.9613,4.534938,0.2751,0.0759,0.2434,0.2437,13.9928,0.0287,1.0,1.1449,27495,24015


TRAINING EPOCH SET 2
TRAINING EPOCHS 1
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.4911,4.460569,0.2837,0.079,0.2495,0.2497,13.924,0.0301,1.0,1.1343,27240,24015


TRAINING EPOCH SET 5
TRAINING EPOCHS 3
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2319,4.40979,0.286,0.079,0.2497,0.25,13.8172,0.0294,1.0,1.128,27089,24015
2,4.3573,4.379532,0.2846,0.0797,0.249,0.249,13.7624,0.0307,1.0,1.1086,26623,24015
3,4.2661,4.378836,0.2854,0.0786,0.2492,0.2493,13.8968,0.0301,1.0,1.1187,26865,24015


TRAINING EPOCH SET 10
TRAINING EPOCHS 5
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.7694,4.415681,0.2806,0.0745,0.2441,0.2442,13.644,0.028,1.0,1.1039,26510,24015
2,3.9875,4.397045,0.285,0.0794,0.2492,0.2493,13.6024,0.0309,1.0,1.0917,26218,24015
3,4.0117,4.377162,0.2827,0.0773,0.2479,0.2478,13.8908,0.0299,1.0,1.1158,26796,24015
4,3.9963,4.380507,0.2867,0.0772,0.2499,0.25,13.526,0.0289,1.0,1.0812,25966,24015
5,3.9532,4.387558,0.2868,0.0778,0.2503,0.2503,13.7144,0.0287,1.0,1.0969,26341,24015


In [13]:
train_df.cluster.value_counts()

cluster
2    3888
3    2114
4     596
1     234
0     168
Name: count, dtype: int64

In [14]:
with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_epoch_set.pickle', 'rb') as handle:
   test_result_df = pickle.load(handle)

test_result_df = test_result_df.rename(columns={"epoch_set": "model_set"})

for cluster_idx in experiment_config["CLUSTER_SET_ID"]:
    test_result_df = test_cluster_set(experiment_config=experiment_config,
                                    test_df=test_df,
                                    test_data=test_data,
                                    tokenizer=tokenizer,
                                    results_df=test_result_df,
                                    cluster_id=cluster_idx,
                                    train_df=train_df)

########## SAVE THE FILE

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_results.pickle', 'wb') as handle:
    pickle.dump(test_result_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

Cluster [1] training size (234, 14)
TRAINING CLUSTER SET [1] FOR EPOCHS5
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,6.0691,5.868877,0.1261,0.016,0.1145,0.1147,9.592,0.0051,0.7129,0.7471,17942,24015
2,5.8927,5.698154,0.1286,0.017,0.117,0.1171,9.544,0.0055,0.7061,0.7419,17816,24015
3,5.6648,5.505741,0.1329,0.0189,0.1208,0.121,9.454,0.0063,0.6949,0.7332,17607,24015
4,5.3918,5.328917,0.1422,0.0226,0.1289,0.1293,9.3876,0.0081,0.6814,0.7227,17356,24015
5,5.1854,5.18564,0.1585,0.0271,0.1435,0.1438,9.6784,0.0105,0.7177,0.7509,18034,24015


Cluster [4] training size (596, 14)
TRAINING CLUSTER SET [4] FOR EPOCHS5
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.8318,5.581477,0.1372,0.0188,0.1247,0.125,9.9312,0.0066,0.7636,0.7876,18914,24015
2,5.2983,5.132429,0.1702,0.0311,0.1544,0.1545,10.5376,0.0136,0.8395,0.8511,20439,24015
3,4.9126,4.89009,0.2066,0.0457,0.1874,0.1874,12.4732,0.0182,1.0,1.0289,24710,24015
4,4.6536,4.791064,0.2271,0.0552,0.2038,0.204,12.47,0.0243,1.0,1.0099,24252,24015
5,4.4779,4.777442,0.2271,0.0552,0.2035,0.2037,12.2128,0.0234,0.9806,0.9808,23554,24015


Cluster [3] training size (2114, 14)
TRAINING CLUSTER SET [3] FOR EPOCHS5
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.2759,4.786207,0.2281,0.0555,0.2058,0.2059,12.39,0.023,0.9948,0.9948,23890,24015
2,4.5997,4.616581,0.2529,0.0654,0.2244,0.2244,12.9776,0.0262,1.0,1.0562,25364,24015
3,4.3923,4.577627,0.261,0.0689,0.2295,0.2295,13.2772,0.0271,1.0,1.0813,25968,24015
4,4.2763,4.55772,0.2651,0.0708,0.2329,0.2331,13.3348,0.0278,1.0,1.085,26056,24015
5,4.2147,4.557394,0.2632,0.0694,0.2309,0.2309,13.2048,0.0276,1.0,1.071,25720,24015


Cluster [0, 1, 4] training size (998, 14)
TRAINING CLUSTER SET [0, 1, 4] FOR EPOCHS5
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.7281,5.257356,0.154,0.0253,0.1394,0.1395,9.9176,0.0093,0.7633,0.7873,18907,24015
2,5.0783,4.824218,0.2284,0.0553,0.2059,0.2058,13.0332,0.0244,1.0,1.0584,25418,24015
3,4.7363,4.69628,0.2417,0.0623,0.2166,0.2167,13.0516,0.0279,1.0,1.0537,25305,24015
4,4.5628,4.666374,0.2515,0.0634,0.2228,0.2229,12.946,0.0287,1.0,1.043,25047,24015
5,4.4592,4.660231,0.2534,0.0653,0.2253,0.2255,12.9312,0.0289,1.0,1.042,25024,24015


In [15]:
### ENSEMBLE COMPUTE
test_result_df = ensemble_compute(test_result_df=test_result_df,
                                  optimal_ensemble_map=optimal_ensemble_map)

########## ROUGE PER SETTING

print("Mean")
print(test_result_df.groupby("model_set")["rouge"].mean())

print("STD")
print(test_result_df.groupby("model_set")["rouge"].std())

print(test_result_df.opt_es_id.value_counts())

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_results_full.pickle', 'wb') as handle:
    pickle.dump(test_result_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

Mean
model_set
0                    0.128119
1                    0.276487
2                    0.284958
5                    0.288171
10                   0.290581
cluster_[0, 1, 4]    0.253683
cluster_[1]          0.158525
cluster_[3]          0.265418
cluster_[4]          0.228561
ensemble             0.288960
Name: rouge, dtype: float64
STD
model_set
0                    0.137439
1                    0.165795
2                    0.167190
5                    0.168888
10                   0.166432
cluster_[0, 1, 4]    0.167487
cluster_[1]          0.150029
cluster_[3]          0.168580
cluster_[4]          0.166655
ensemble             0.168918
Name: rouge, dtype: float64
opt_es_id
5              21410
10              2130
2                840
1                390
cluster_[3]      130
cluster_[4]      100
Name: count, dtype: int64


In [16]:
test_result_df.opt_es_id.value_counts()

opt_es_id
5              21410
10              2130
2                840
1                390
cluster_[3]      130
cluster_[4]      100
Name: count, dtype: int64