### 1. Settings

In [1]:
#####################################
##########  DEPENDECIES ############
#####################################

import os
import pickle
import sys
sys.path.append("../")

from tqdm import tqdm # type: ignore
from datetime import date

import evaluate

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from utils.sampling import create_splits, prep_cv_validation
from utils.training import cv_cluster_set, cv_training_epochs_sets, test_cluster_set
from utils.training import results_dict_todf, cv_step_2, full_step_2, test_training_epochs_sets
from utils.inference import meta_predict, create_ensemble_map, ensemble_compute

tqdm.pandas()
import warnings
warnings.filterwarnings("ignore")

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import torch

#####################################
############  CONSTANTS #############
#####################################

RS = 42
BATCH_SIZE = 16
DECODER_LENGTH = 30
ENCODER_LENGTH = 30
MODEL_NAME = "Salesforce/codet5-base-multi-sum"

FULL_TRAIN_ARGS = {
    "BATCH_SIZE": BATCH_SIZE,
    "DECODER_LENGTH": DECODER_LENGTH,
    "ENCODER_LENGTH": ENCODER_LENGTH,
    "SEQ_TRAINER_ARGS": {
        "overwrite_output_dir": True,
        "num_train_epochs": [0, 1, 2, 5, 10],
        "do_train": True,
        "do_eval": True,
        "per_device_train_batch_size": 4,
        "per_device_eval_batch_size": 4,
        "learning_rate": 1e-5,
        "warmup_steps": 500,
        "weight_decay": 0.1,
        "label_smoothing_factor": 0.1,
        "predict_with_generate": True,
        "logging_steps": 100,
        "save_total_limit": 1,
        "save_strategy": "no",
        "logging_strategy": "epoch",
        "evaluation_strategy": "epoch",
        "load_best_model_at_end": False,
        "output_dir" : 'reports/results',
        "logging_dir" : "reports/logs",
    },
}

experiment_config = {
    "DATA_STR" : "20240908",
    "RS" : RS,
    "DRIFT_TYPE" : "no_drift",
    "NFOLD" : 3,
    "FULL_TRAIN_ARGS" : FULL_TRAIN_ARGS,
    "MODEL_NAME" : MODEL_NAME,
    "CLUSTER_EPOCHS" : 3,
    "CLUSTER_SET_ID" : [0, 3, [0, 3,]],
    "TRAIN_SIZE" : 7000,
    "TEST_SIZE" : 2500,
}
experiment_config["ANALYSIS_POSTFIX"] = f"mined_{experiment_config['DRIFT_TYPE']}_{str(date.today())}"
experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["output_dir"] += "/" + experiment_config["ANALYSIS_POSTFIX"] 
experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["logging_dir"] += "/" + experiment_config["ANALYSIS_POSTFIX"] 

if not os.path.exists(experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["logging_dir"]):
    os.mkdir(experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["logging_dir"])

if not os.path.exists(experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["output_dir"]):
    os.mkdir(experiment_config["FULL_TRAIN_ARGS"]["SEQ_TRAINER_ARGS"]["output_dir"])

tokenizer = AutoTokenizer.from_pretrained(experiment_config["MODEL_NAME"], skip_special_tokens=False)
model = AutoModelForSeq2SeqLM.from_pretrained(experiment_config["MODEL_NAME"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
rouge = evaluate.load('rouge')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/RDC/zinovyee.hub/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### 2. Conala data. Preprocessing. 

In [2]:
sampling_dict = create_splits(experiment_config=experiment_config, tokenizer=tokenizer, train_size=experiment_config["TRAIN_SIZE"], test_size=experiment_config["TEST_SIZE"], cluster_id=4)
train_dataset, test_data, test_df, train_df = sampling_dict["train_data"], sampling_dict["test_data"], sampling_dict["test_df"], sampling_dict["train_df"]

splits, questions_list = prep_cv_validation(train_dataset=train_dataset, 
                            experiment_config=experiment_config)

Train Data:  (7000, 11)
Test Data:  (2500, 11)
Train Data: Cluster cluster
1    3280
0    1352
2    1325
3    1043
Name: count, dtype: int64
Test Data: Cluster cluster
1    1156
2     490
0     441
3     413
Name: count, dtype: int64


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2500 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2500 [00:00<?, ? examples/s]

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

Fold 0
Fold 1
Fold 2


In [3]:
fold_results = cv_training_epochs_sets(experiment_config=experiment_config,
                            splits=splits,
                            questions_list=questions_list,
                            train_dataset=train_dataset,
                            tokenizer=tokenizer)

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_fold_epoch_set.pickle', 'wb') as handle:
    pickle.dump(fold_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

Fold 0
/usr/net/zinovyee.hub/IRTG/MLSC/MLSC_DD/ensemble_learning


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Fold train:  (4666, 14)
Fold val:  (2334, 14)
TRAINING EPOCH SET 0
TRAINING EPOCHS 0
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 1
TRAINING EPOCHS 1


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.0747,4.589807,0.2686,0.0737,0.2389,0.239,13.6555,0.0283,1.0,1.1182,24827,22202


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 2
TRAINING EPOCHS 1
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.5285,4.510561,0.2762,0.0761,0.2433,0.2433,13.6474,0.03,1.0,1.1138,24729,22202


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 5
TRAINING EPOCHS 3
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2559,4.459424,0.2804,0.0771,0.2476,0.2477,13.3599,0.0302,1.0,1.083,24045,22202
2,4.3943,4.424267,0.2824,0.078,0.2484,0.2485,13.7014,0.0294,1.0,1.1122,24694,22202
3,4.2869,4.4197,0.281,0.0756,0.2462,0.2462,13.6624,0.029,1.0,1.1086,24614,22202


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 10
TRAINING EPOCHS 5
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.7873,4.469879,0.2789,0.0744,0.2447,0.2446,13.1787,0.0276,1.0,1.0539,23399,22202
2,3.9979,4.438773,0.2798,0.0758,0.2449,0.2449,13.6422,0.0285,1.0,1.101,24444,22202
3,4.0181,4.419032,0.2804,0.076,0.2461,0.246,13.6024,0.0289,1.0,1.0947,24304,22202
4,4.01,4.424268,0.2836,0.0767,0.2481,0.2482,13.5454,0.0296,1.0,1.088,24156,22202
5,3.9639,4.428289,0.2818,0.0754,0.2455,0.2456,13.5428,0.0291,1.0,1.0879,24154,22202


Generating Summaries
Computing Performance
Saving the model
Fold 1
/usr/net/zinovyee.hub/IRTG/MLSC/MLSC_DD/ensemble_learning


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Fold train:  (4667, 14)
Fold val:  (2333, 14)
TRAINING EPOCH SET 0
TRAINING EPOCHS 0
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 1
TRAINING EPOCHS 1


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.0752,4.591667,0.2701,0.0724,0.2383,0.2385,13.8024,0.0246,1.0,1.1276,25272,22412


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 2
TRAINING EPOCHS 1


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL ./models/tmp/
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.5314,4.509459,0.2778,0.0754,0.2447,0.245,13.7231,0.0263,1.0,1.1166,25025,22412


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 5
TRAINING EPOCHS 3
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2637,4.457325,0.2784,0.0756,0.2461,0.2463,13.6485,0.0265,1.0,1.1069,24808,22412
2,4.3941,4.420239,0.2846,0.0791,0.2515,0.2518,13.9173,0.0282,1.0,1.1279,25278,22412
3,4.2908,4.416649,0.2844,0.0777,0.2506,0.2508,13.7167,0.0285,1.0,1.1078,24829,22412


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 10
TRAINING EPOCHS 5


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL ./models/tmp/
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.8003,4.465541,0.2763,0.0735,0.2434,0.2435,13.4068,0.0257,1.0,1.077,24137,22412
2,3.9954,4.435143,0.2813,0.0763,0.247,0.2474,13.6824,0.0261,1.0,1.1019,24696,22412
3,4.0192,4.413756,0.2827,0.0772,0.2492,0.2493,13.4016,0.0288,1.0,1.0677,23929,22412
4,4.0095,4.421983,0.2817,0.0766,0.2478,0.2479,13.5276,0.0276,1.0,1.0796,24197,22412
5,3.9653,4.424644,0.2833,0.0766,0.2482,0.2484,13.4951,0.0282,1.0,1.0789,24180,22412


Generating Summaries
Computing Performance
Saving the model
Fold 2
/usr/net/zinovyee.hub/IRTG/MLSC/MLSC_DD/ensemble_learning


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Fold train:  (4667, 14)
Fold val:  (2333, 14)
TRAINING EPOCH SET 0
TRAINING EPOCHS 0


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda
Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 1
TRAINING EPOCHS 1


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.0678,4.593658,0.2688,0.0729,0.2386,0.239,13.7703,0.0252,1.0,1.1183,25079,22427


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 2
TRAINING EPOCHS 1
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.5176,4.513836,0.2781,0.0773,0.2449,0.2454,13.7805,0.0283,1.0,1.1183,25080,22427


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 5
TRAINING EPOCHS 3
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2458,4.464506,0.2785,0.0776,0.2458,0.2462,13.4248,0.0281,1.0,1.0827,24282,22427
2,4.388,4.427325,0.28,0.079,0.2457,0.2462,13.7767,0.0276,1.0,1.1148,25001,22427
3,4.281,4.424661,0.2823,0.0804,0.2486,0.2491,13.6335,0.0294,1.0,1.0967,24595,22427


Generating Summaries
Computing Performance
Saving the model
TRAINING EPOCH SET 10
TRAINING EPOCHS 5
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.7771,4.466114,0.2758,0.0773,0.2423,0.2425,13.1856,0.0292,1.0,1.0512,23575,22427
2,3.9885,4.446638,0.2791,0.0776,0.2445,0.2449,13.6438,0.0285,1.0,1.0958,24576,22427
3,4.0098,4.422835,0.2799,0.079,0.2464,0.2466,13.6485,0.0287,1.0,1.0925,24501,22427
4,4.0024,4.429433,0.2821,0.0799,0.2473,0.2477,13.5096,0.0287,1.0,1.0781,24179,22427
5,3.9603,4.433535,0.2812,0.0796,0.2463,0.2467,13.6614,0.0278,1.0,1.0943,24541,22427


Generating Summaries
Computing Performance
Saving the model


In [4]:
with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_fold_epoch_set.pickle', 'rb') as handle:
   fold_results = pickle.load(handle)
   
for cluster_idx in experiment_config["CLUSTER_SET_ID"]:
    fold_results = cv_cluster_set(experiment_config=experiment_config,
                                            splits=splits,
                                            questions_list=questions_list,
                                            train_dataset=train_dataset,
                                            tokenizer=tokenizer,
                                            fold_results=fold_results,
                                            cluster_id=cluster_idx)

cv_df = results_dict_todf(fold_results)

########## SAVE THE FILE

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_step1.pickle', 'wb') as handle:
    pickle.dump(cv_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

Fold 0


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0] training size (890, 14)
TRAINING CLUSTER SET [0] FOR EPOCHS 3


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.9572,5.393718,0.1374,0.0217,0.1256,0.1259,8.1362,0.0065,0.5168,0.6024,13374,22202
2,5.2278,5.047328,0.1756,0.0279,0.1616,0.1619,7.3003,0.0077,0.4172,0.5336,11846,22202
3,4.8324,4.980356,0.1962,0.0349,0.1782,0.1785,7.6491,0.0083,0.4606,0.5633,12507,22202


Fold 1


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0] training size (907, 14)
TRAINING CLUSTER SET [0] FOR EPOCHS 3


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL ./models/tmp/
cuda
Fold 2


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0] training size (907, 14)
TRAINING CLUSTER SET [0] FOR EPOCHS 3
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 0


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [3] training size (716, 14)
TRAINING CLUSTER SET [3] FOR EPOCHS 3


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.8618,5.399143,0.1431,0.0219,0.1289,0.1292,10.7961,0.0083,0.8711,0.8787,19509,22202
2,5.2446,4.970186,0.2331,0.0579,0.2108,0.2109,15.5291,0.018,1.0,1.3068,29013,22202
3,4.8625,4.818551,0.2478,0.0641,0.2205,0.2205,16.1298,0.0213,1.0,1.3724,30469,22202


Fold 1


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [3] training size (685, 14)
TRAINING CLUSTER SET [3] FOR EPOCHS 3
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 2


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [3] training size (685, 14)
TRAINING CLUSTER SET [3] FOR EPOCHS 3


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL ./models/tmp/
cuda
Fold 0


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0, 3] training size (1606, 14)
TRAINING CLUSTER SET [0, 3] FOR EPOCHS 3


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.6456,4.89024,0.2048,0.0442,0.1865,0.1867,12.7896,0.0192,1.0,1.059,23511,22202
2,4.928,4.637165,0.256,0.0666,0.227,0.2272,13.7082,0.0239,1.0,1.1341,25179,22202
3,4.6964,4.606119,0.2676,0.0717,0.2366,0.2368,14.162,0.0249,1.0,1.1817,26237,22202


Fold 1


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0, 3] training size (1592, 14)
TRAINING CLUSTER SET [0, 3] FOR EPOCHS 3
LOADING MODEL ./models/tmp/
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fold 2


Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Cluster [0, 3] training size (1592, 14)
TRAINING CLUSTER SET [0, 3] FOR EPOCHS 3


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL ./models/tmp/
cuda


In [5]:
print("Mean")
print(cv_df.groupby(["model_set"])["rouge"].mean())

print("STD")
print(cv_df.groupby("model_set")["rouge"].std())

Mean
model_set
0                 0.130373
1                 0.269827
2                 0.279207
5                 0.284077
10                0.284135
cluster_[0, 3]    0.270036
cluster_[0]       0.198179
cluster_[3]       0.249134
Name: rouge, dtype: float64
STD
model_set
0                 0.137666
1                 0.160884
2                 0.162966
5                 0.164087
10                0.164493
cluster_[0, 3]    0.160769
cluster_[0]       0.161238
cluster_[3]       0.150450
Name: rouge, dtype: float64


### Step 2. Learn performance

In [6]:
########## LOAD CV RESULTS

import pickle
with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_step1.pickle', 'rb') as handle:
    cv_df = pickle.load(handle)

########## RUN STEP 2 ON CV

cv_df, model_results = cv_step_2(experiment_config=experiment_config, cv_df=cv_df, t_models=["lgbm"])

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/s2_model_results.pickle', 'wb') as handle:
    pickle.dump(model_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_results.pickle', 'wb') as handle:
    pickle.dump(cv_df, handle, protocol=pickle.HIGHEST_PROTOCOL)


0
lgbm
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024430 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 13018
[LightGBM] [Info] Number of data points in the train set: 37328, number of used features: 1024
[LightGBM] [Info] Start training from score 0.245488
1
lgbm
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.056369 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12883
[LightGBM] [Info] Number of data points in the train set: 37336, number of used features: 1038
[LightGBM] [Info] Start training from score 0.245127
2
lgbm
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024645 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12904
[LightGBM] [Info] Number of 

In [7]:
### TO SAVE THE VECTORIZER AND STEP 2 MODELS

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/cv_results.pickle', 'rb') as handle:
    cv_df = pickle.load(handle)


print("Mean")
print(cv_df.groupby(["model_set"])["rouge"].mean())

print("STD")
print(cv_df.groupby("model_set")["rouge"].std())


full_step_2(cv_df=cv_df, 
            experiment_config=experiment_config, t_models=["lgbm"])

Mean
model_set
0                 0.130373
1                 0.269827
2                 0.279207
5                 0.284077
10                0.284135
cluster_[0, 3]    0.270036
cluster_[0]       0.198179
cluster_[3]       0.249134
ensemble          0.284177
Name: rouge, dtype: float64
STD
model_set
0                 0.137666
1                 0.160884
2                 0.162966
5                 0.164087
10                0.164493
cluster_[0, 3]    0.160769
cluster_[0]       0.161238
cluster_[3]       0.150450
ensemble          0.164489
Name: rouge, dtype: float64
lgbm
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.047879 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19308
[LightGBM] [Info] Number of data points in the train set: 56000, number of used features: 1384
[LightGBM] [Info] Start training from score 0.245621


# TEST

In [8]:
# sampling_dict = create_splits(experiment_config=experiment_config, tokenizer=tokenizer, test=True, train_size=100, test_size=25, cluster_id=4)
# train_dataset, test_data, test_df, train_df = sampling_dict["train_data"], sampling_dict["test_data"], sampling_dict["test_df"], sampling_dict["train_df"]

# splits, questions_list = prep_cv_validation(train_dataset=train_dataset, 
#                             experiment_config=experiment_config)

In [9]:
with open(f"reports/results/{experiment_config['ANALYSIS_POSTFIX']}/cv_results.pickle", "rb") as handle:
    cv_resutls = pickle.load(handle)

base_models_list = list(cv_resutls.model_set.unique())
base_models_list.pop(-1)

'ensemble'

In [10]:
meta_preds_df = meta_predict(experiment_config=experiment_config, 
                    test_df=test_df,
                    base_models_names=base_models_list,
                    t_models=["lgbm"])

########## SAVE THE FILE

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_step2.pickle', 'wb') as handle:
    pickle.dump(meta_preds_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

lgbm


In [11]:
with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_step2.pickle', 'rb') as handle:
    meta_preds_df = pickle.load(handle)
    
meta_preds_df.groupby("model_set").lgbm_preds.mean()

model_set
0                 0.138374
1                 0.272835
2                 0.277185
5                 0.279695
10                0.279916
cluster_[0, 3]    0.272846
cluster_[0]       0.206853
cluster_[3]       0.256394
Name: lgbm_preds, dtype: float64

In [12]:
optimal_ensemble_map, ensemble_val_estim = create_ensemble_map(meta_preds_df=meta_preds_df, 
                                                                t_model_name="lgbm")

In [13]:
test_result_df = test_training_epochs_sets(experiment_config=experiment_config,
                            test_df=test_df,
                            test_data=test_data,
                            train_data=train_dataset,
                            tokenizer=tokenizer)

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_epoch_set.pickle', 'wb') as handle:
    pickle.dump(test_result_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

TRAINING EPOCH SET 0
TRAINING EPOCHS 0
Salesforce/codet5-base-multi-sum


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


cuda
TRAINING EPOCH SET 1
TRAINING EPOCHS 1
Salesforce/codet5-base-multi-sum


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.9381,4.557556,0.275,0.0743,0.2415,0.2415,13.7476,0.0275,1.0,1.0988,26838,24424


TRAINING EPOCH SET 2
TRAINING EPOCHS 1
./models/1_epoch_set


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.4666,4.481162,0.281,0.0762,0.2454,0.2456,13.786,0.0289,1.0,1.0965,26780,24424


TRAINING EPOCH SET 5
TRAINING EPOCHS 3
./models/2_epoch_set
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2079,4.426007,0.2828,0.0756,0.2459,0.2459,13.7856,0.0269,1.0,1.102,26916,24424
2,4.3393,4.395939,0.2821,0.0773,0.2479,0.2482,13.7628,0.0297,1.0,1.0865,26536,24424
3,4.2454,4.392785,0.2846,0.078,0.249,0.2492,13.7148,0.0305,1.0,1.0814,26413,24424


TRAINING EPOCH SET 10
TRAINING EPOCHS 5
./models/5_epoch_set


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.7465,4.42303,0.2821,0.0732,0.2457,0.2459,13.6852,0.0259,1.0,1.0869,26546,24424
2,3.9684,4.407227,0.2838,0.0787,0.2493,0.2494,13.7388,0.029,1.0,1.0778,26324,24424
3,3.9917,4.390144,0.2827,0.0752,0.247,0.2473,13.7424,0.0276,1.0,1.0777,26321,24424
4,3.9798,4.39422,0.279,0.0742,0.2431,0.2432,13.652,0.0265,1.0,1.07,26134,24424
5,3.9315,4.400657,0.2807,0.0756,0.246,0.2461,13.696,0.028,1.0,1.0695,26122,24424


In [14]:
train_df.cluster.value_counts()

cluster
1    3280
0    1352
2    1325
3    1043
Name: count, dtype: int64

In [15]:
with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_epoch_set.pickle', 'rb') as handle:
   test_result_df = pickle.load(handle)

test_result_df = test_result_df.rename(columns={"epoch_set": "model_set"})

for cluster_idx in experiment_config["CLUSTER_SET_ID"]:
    test_result_df = test_cluster_set(experiment_config=experiment_config,
                                    test_df=test_df,
                                    test_data=test_data,
                                    tokenizer=tokenizer,
                                    results_df=test_result_df,
                                    cluster_id=cluster_idx,
                                    train_df=train_df)

########## SAVE THE FILE

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_results.pickle', 'wb') as handle:
    pickle.dump(test_result_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

Cluster [0] training size (1352, 14)
TRAINING CLUSTER SET [0] FOR EPOCHS3


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.7559,5.176994,0.1507,0.0187,0.1388,0.1393,7.1204,0.0046,0.3727,0.5033,12292,24424
2,4.9565,4.982159,0.1925,0.032,0.1715,0.1719,7.6392,0.0048,0.44,0.5492,13413,24424
3,4.6605,4.959134,0.2,0.0339,0.1765,0.177,7.756,0.0063,0.4515,0.557,13605,24424


Cluster [3] training size (1043, 14)
TRAINING CLUSTER SET [3] FOR EPOCHS3
LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.7093,5.176757,0.1784,0.0338,0.1617,0.1617,12.9892,0.0127,1.0,1.0581,25843,24424
2,4.9903,4.840027,0.2438,0.061,0.2183,0.2183,16.1668,0.0218,1.0,1.3447,32842,24424
3,4.6829,4.773511,0.2512,0.0652,0.2239,0.2243,16.4084,0.0227,1.0,1.3667,33381,24424


Cluster [0, 3] training size (2395, 14)
TRAINING CLUSTER SET [0, 3] FOR EPOCHS3


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.4378,4.739243,0.2297,0.056,0.2073,0.2071,13.4572,0.0187,1.0,1.0856,26515,24424
2,4.7791,4.603551,0.2613,0.0665,0.2299,0.23,14.8464,0.0214,1.0,1.2172,29729,24424
3,4.6175,4.581701,0.2657,0.0689,0.2326,0.2327,14.5524,0.0225,1.0,1.1939,29160,24424


In [16]:
### ENSEMBLE COMPUTE
test_result_df = ensemble_compute(test_result_df=test_result_df,
                                  optimal_ensemble_map=optimal_ensemble_map)

########## ROUGE PER SETTING

print("Mean")
print(test_result_df.groupby("model_set")["rouge"].mean())

print("STD")
print(test_result_df.groupby("model_set")["rouge"].std())

print(test_result_df.opt_es_id.value_counts())

with open(f'reports/results/{experiment_config["ANALYSIS_POSTFIX"]}/test_results_full.pickle', 'wb') as handle:
    pickle.dump(test_result_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

Mean
model_set
0                 0.123299
1                 0.276431
2                 0.282285
5                 0.286905
10                0.284072
cluster_[0, 3]    0.266360
cluster_[0]       0.202202
cluster_[3]       0.251622
ensemble          0.286127
Name: rouge, dtype: float64
STD
model_set
0                 0.133633
1                 0.162703
2                 0.162382
5                 0.165110
10                0.164032
cluster_[0, 3]    0.157822
cluster_[0]       0.158643
cluster_[3]       0.150236
ensemble          0.165175
Name: rouge, dtype: float64
opt_es_id
10    17541
5      3717
2       639
1       603
Name: count, dtype: int64


In [17]:
test_result_df.opt_es_id.value_counts()

opt_es_id
10    17541
5      3717
2       639
1       603
Name: count, dtype: int64