### 1. Settings

In [1]:
#####################################
##########  DEPENDECIES ############
#####################################

import os
import pickle
import numpy as np
from tqdm import tqdm # type: ignore
import pandas as pd
import copy

from datasets import load_dataset, DatasetDict, Dataset
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainer, Seq2SeqTrainingArguments
from sklearn.model_selection import KFold # type: ignore
import evaluate

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

import utils.prep as pr
import utils.eval as ev
import utils.inference as infer

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from scipy.sparse import hstack

tqdm.pandas()

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import torch
#####################################
############  CONSTANTS #############
#####################################
RS = 42

MODEL = "CodeT5"
TRAIN_N = 330
BATCH_SIZE = 15
DECODER_LENGTH = 20
ENCODER_LENGTH = 15

FULL_TRAIN_ARGS = {
    "TRAIN_N": TRAIN_N,
    "BATCH_SIZE": BATCH_SIZE,
    "DECODER_LENGTH": DECODER_LENGTH,
    "ENCODER_LENGTH": ENCODER_LENGTH,
    "MODEL": MODEL,
    "SEQ_TRAINER_ARGS": {
        "overwrite_output_dir": True,
        "num_train_epochs": [0, 1, 2, 3, 4, 7, 9],
        "do_train": True,
        "do_eval": True,
        "per_device_train_batch_size": 4,
        "per_device_eval_batch_size": 4,
        "learning_rate": 5e-5,
        "warmup_steps": 100,
        "weight_decay": 0.1,
        "label_smoothing_factor": 0.1,
        "predict_with_generate": True,
        "logging_steps": 100,
        "save_total_limit": 1,
        "save_strategy": "no",
        "logging_strategy": "epoch",
        "evaluation_strategy": "epoch",
        "load_best_model_at_end": False,
    },
}
FULL_TRAIN_ARGS["SEQ_TRAINER_ARGS"]["output_dir"] = f'reports/results'
FULL_TRAIN_ARGS["SEQ_TRAINER_ARGS"]["logging_dir"] = f'reports/logs'

model_name="Salesforce/codet5-base-multi-sum"
tokenizer = AutoTokenizer.from_pretrained(model_name, skip_special_tokens=False)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to
[nltk_data]     /home/RDC/zinovyee.hub/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### 2. Conala data. Preprocessing. Sampling as in the paper (further, random sampling)

In [2]:
dataset = pd.read_csv(f"../data/processed/conala/20240327/conala_clustered.csv")
dataset = dataset.drop("time_batch", axis=1)

test_4_examples = dataset[dataset["cluster"]==4].sample(frac=0.85, random_state=RS)
print("Cluster 4 obsevations: ", test_4_examples.shape)
test_non4_examples = dataset[dataset["cluster"]!=4].sample(n=156, random_state=RS)
print("Cluster not 4 obsevations: ", test_non4_examples.shape)

test_dataset = pd.concat([test_4_examples, test_non4_examples])
train_dataset = dataset[~dataset.index.isin(test_dataset.index)]
print("Train Data: ", train_dataset.shape)
print("Test Data: ", test_dataset.shape)

train_dataset = Dataset.from_pandas(train_dataset.sample(frac=1, random_state=RS).reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_dataset.sample(frac=1, random_state=RS).reset_index(drop=True))

train_data = pr.preprocess_dataset(train_dataset, tokenizer=tokenizer)
test_data = pr.preprocess_dataset(test_dataset, tokenizer=tokenizer)
test_df = pd.DataFrame(test_data)
test_df["id"] = test_df.index

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
rouge = evaluate.load('rouge')

Cluster 4 obsevations:  (344, 6)
Cluster not 4 obsevations:  (156, 6)
Train Data:  (2379, 6)
Test Data:  (500, 6)


Filter: 100%|██████████| 2379/2379 [00:00<00:00, 140631.82 examples/s]
Filter: 100%|██████████| 2379/2379 [00:00<00:00, 61298.99 examples/s]
Map: 100%|██████████| 2379/2379 [00:01<00:00, 2120.28 examples/s]
Filter: 100%|██████████| 500/500 [00:00<00:00, 84294.06 examples/s]
Filter: 100%|██████████| 500/500 [00:00<00:00, 46328.50 examples/s]
Map: 100%|██████████| 499/499 [00:00<00:00, 2546.48 examples/s]


In [3]:
torch.cuda.device_count()

1

In [4]:
def pred_perf(X, model): 

    with open(f'./models/reg_{model}_drift.pkl','rb') as f:
                        reg = pickle.load(f)
    y_pred = reg.predict(X)
    y_pred[y_pred<0] = 0
    return y_pred

In [16]:
### Step 1. PREDICT PERFORMANCE

# TRAIN ON ALL PREDICTIONS AT ONCE

t_models = ["svm", "catboost"]

for i, model_id in enumerate(['epoch_set_0', 'epoch_set_1', 'epoch_set_2', 'epoch_set_3',
       'epoch_set_4', 'epoch_set_7', 'epoch_set_9', 'cluster_set_0',
       'cluster_set_1', 'cluster_set_2', 'cluster_set_3', 'cluster_set_4',
       'cluster_set_5', 'cluster_set_6']):

    set_df = test_df.copy()
    set_df["model_id"] = model_id
    # Prepare the input data
    with open("./models/vectorizer_drift.pkl", "rb") as file:
        vectorizer = pickle.load(file)

    if i==0:
        meta_preds_df = set_df.copy()
    else: 
        meta_preds_df = pd.concat([meta_preds_df, set_df])
         
X_test_tfidf = vectorizer.transform(meta_preds_df.loc[:, "input_sequence"])
X_test_column_sparse = pd.get_dummies(meta_preds_df.loc[:, "model_id"], sparse=True).sparse.to_coo().tocsr()
X_test = hstack([X_test_column_sparse, X_test_tfidf])
#y_test = test_df.loc[:, "rouge"]

models_preds = []
for model in t_models:
    print(model)
    meta_preds_df[f"{model}_preds"] = pred_perf(X_test, model)

meta_preds_df = meta_preds_df.reset_index(drop=True)

svm
catboost


In [17]:
meta_preds_df.groupby("model_id").catboost_preds.mean()

model_id
cluster_set_0    0.425801
cluster_set_1    0.404294
cluster_set_2    0.407196
cluster_set_3    0.433571
cluster_set_4    0.316861
cluster_set_5    0.352937
cluster_set_6    0.405486
epoch_set_0      0.317432
epoch_set_1      0.467290
epoch_set_2      0.469011
epoch_set_3      0.467313
epoch_set_4      0.461308
epoch_set_7      0.467185
epoch_set_9      0.462042
Name: catboost_preds, dtype: float64

In [19]:
models_index = meta_preds_df.groupby("id")["catboost_preds"].idxmax()
optimal_ensemble = meta_preds_df.iloc[models_index][["id", "model_id"]]
optimal_ensemble_map = dict(zip(optimal_ensemble.id, optimal_ensemble.model_id))

In [42]:
results = {}
latest_run_epoch = 0

for epoch_i, epoch_set in enumerate(sorted(FULL_TRAIN_ARGS["SEQ_TRAINER_ARGS"]["num_train_epochs"])):

    set_df = test_df.copy()
    print(f"TRAINING EPOCH SET {epoch_set}")

    TRAIN_ARGS = copy.deepcopy(FULL_TRAIN_ARGS)
    MODEL_PATH = f"./models/{epoch_set}_epoch_set"
    PREV_MODEL_PATH = f"./models/{latest_run_epoch}_epoch_set"
    

    results[epoch_set] = {}

    if epoch_set > 1: 
        TRAIN_ARGS["SEQ_TRAINER_ARGS"]["num_train_epochs"] = epoch_set - latest_run_epoch
    else:
        TRAIN_ARGS["SEQ_TRAINER_ARGS"]["num_train_epochs"] = epoch_set
    
    print(f'TRAINING EPOCHS {TRAIN_ARGS["SEQ_TRAINER_ARGS"]["num_train_epochs"]}')

    if epoch_set > 1: 
        model = AutoModelForSeq2SeqLM.from_pretrained(PREV_MODEL_PATH)
        print(f"LOADING MODEL {PREV_MODEL_PATH}")
    else: 
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        print(f"LOADING MODEL {model_name}")

    print(device)
    model.to(device)

    data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
    compute_metrics = ev.compute_metric_with_params(tokenizer) 

    if not os.path.exists(f'reports/'): 
        os.mkdir(f'reports/')

    training_args = Seq2SeqTrainingArguments(
            **TRAIN_ARGS["SEQ_TRAINER_ARGS"],
        )
    
    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=train_data,
        eval_dataset=test_data,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    if epoch_set!=0:
        trainer.train()

    text = list(test_df["input_sequence"].values)
    summaries = infer.generate_summary(text, model, tokenizer, TRAIN_ARGS["ENCODER_LENGTH"], TRAIN_ARGS["DECODER_LENGTH"])
    
    
    set_df["model_id"] = "epoch_set_" + str(epoch_set)
    set_df["prediction"] = summaries[1]
    set_df["rouge"] = rouge.compute(predictions=set_df["prediction"], 
                references=set_df["output_sequence"],
                use_stemmer=True, 
                use_aggregator=False,
                rouge_types=["rouge1"])["rouge1"]

    if epoch_set==0:
        test_result_df = set_df.copy()
    else: 
        test_result_df = pd.concat([test_result_df, set_df])


    
    ########## SAVE EPOCH SET MODEL
    if not os.path.exists(MODEL_PATH): 
        os.mkdir(MODEL_PATH)

    trainer.save_model(MODEL_PATH)

    latest_run_epoch = epoch_set


########## SAVE THE FILE

with open('test_results_df_drift.pickle', 'wb') as handle:
    pickle.dump(test_result_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

TRAINING EPOCH SET 0
TRAINING EPOCHS 0


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
  return dynamo.is_compiling()


TRAINING EPOCH SET 1
TRAINING EPOCHS 1




LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.54,3.491663,0.43,0.1963,0.395,0.3945,13.6072,0.1888,0.9275,0.93,5410,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET 2
TRAINING EPOCHS 1
LOADING MODEL ./models/1_epoch_set
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,2.781,3.457503,0.4418,0.2064,0.4021,0.4023,14.012,0.1994,0.9623,0.963,5602,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET 3
TRAINING EPOCHS 1
LOADING MODEL ./models/2_epoch_set
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,2.2958,3.527485,0.4417,0.2064,0.4024,0.4022,14.0461,0.1989,0.9636,0.9642,5609,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET 4
TRAINING EPOCHS 1
LOADING MODEL ./models/3_epoch_set
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,2.0136,3.640005,0.4451,0.2091,0.4051,0.4046,14.3387,0.2069,0.9928,0.9928,5775,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET 7
TRAINING EPOCHS 3
LOADING MODEL ./models/4_epoch_set
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,1.8765,3.49629,0.4361,0.2059,0.3993,0.3993,13.5631,0.1967,0.9072,0.9113,5301,5817
2,2.5989,3.477729,0.4447,0.2082,0.4053,0.4053,14.1303,0.2053,0.9711,0.9715,5651,5817
3,2.3527,3.551478,0.445,0.2089,0.4043,0.4038,14.3387,0.2045,0.9896,0.9897,5757,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET 9
TRAINING EPOCHS 2
LOADING MODEL ./models/7_epoch_set
cuda


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,1.6764,3.678091,0.4345,0.2077,0.3968,0.3962,14.7976,0.203,1.0,1.0198,5932,5817
2,1.8826,3.716435,0.4367,0.205,0.3974,0.3968,14.3287,0.2036,0.9762,0.9764,5680,5817


  return dynamo.is_compiling()


In [25]:
with open('test_results_df_drift.pickle', 'rb') as handle:
    test_result_df = pickle.load(handle)

In [12]:
results = {}
latest_run_epoch = 0
cluster_models = [[1,2], [3,4], [5,6], [7,8], [4], [5], [1]]

for cluster_i, cluster_set in enumerate(cluster_models):

    set_df = test_df.copy()
    print(f"TRAINING EPOCH SET {cluster_set}")

    TRAIN_ARGS = copy.deepcopy(FULL_TRAIN_ARGS)
    MODEL_PATH = f"./models/{cluster_set}_cluster_set"

    results[cluster_i] = {}

    TRAIN_ARGS["SEQ_TRAINER_ARGS"]["num_train_epochs"] = 2
    
    print(f'TRAINING EPOCHS {TRAIN_ARGS["SEQ_TRAINER_ARGS"]["num_train_epochs"]}')

    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    print(f"LOADING MODEL {model_name}")

    print(device)
    model.to(device)

    data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
    compute_metrics = ev.compute_metric_with_params(tokenizer) 

    if not os.path.exists(f'reports/'): 
        os.mkdir(f'reports/')

    train_data_cl = train_data.filter(lambda q_id: q_id["cluster"] in cluster_set)

    training_args = Seq2SeqTrainingArguments(
            **TRAIN_ARGS["SEQ_TRAINER_ARGS"],
        )
    
    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=train_data_cl,
        eval_dataset=test_data,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    text = list(test_df["input_sequence"].values)
    summaries = infer.generate_summary(text, model, tokenizer, TRAIN_ARGS["ENCODER_LENGTH"], TRAIN_ARGS["DECODER_LENGTH"])
    
    
    set_df["model_id"] = "cluster_set_" + str(cluster_i)
    set_df["prediction"] = summaries[1]
    set_df["rouge"] = rouge.compute(predictions=set_df["prediction"], 
                references=set_df["output_sequence"],
                use_stemmer=True, 
                use_aggregator=False,
                rouge_types=["rouge1"])["rouge1"]

    if cluster_i==0:
        test_result_df_cluster = set_df.copy()
    else: 
        test_result_df_cluster = pd.concat([test_result_df_cluster, set_df])

    ########## SAVE EPOCH SET MODEL
    if not os.path.exists(MODEL_PATH): 
        os.mkdir(MODEL_PATH)

    trainer.save_model(MODEL_PATH)


########## SAVE THE FILE

with open('test_results_df_drift_cluster.pickle', 'wb') as handle:
    pickle.dump(test_result_df_cluster, handle, protocol=pickle.HIGHEST_PROTOCOL)

TRAINING EPOCH SET [1, 2]
TRAINING EPOCHS 2




LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Filter: 100%|██████████| 2379/2379 [00:00<00:00, 33241.99 examples/s]
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.2877,3.907113,0.3602,0.1206,0.3251,0.3257,11.3988,0.082,0.7516,0.7779,4525,5817
2,3.1713,3.793663,0.3759,0.1375,0.3362,0.3359,12.3868,0.1171,0.8361,0.8482,4934,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET [3, 4]
TRAINING EPOCHS 2




LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Filter: 100%|██████████| 2379/2379 [00:00<00:00, 33007.55 examples/s]
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.535,3.948033,0.3548,0.1069,0.313,0.3135,9.8838,0.0493,0.5755,0.6441,3747,5817
2,3.4569,3.746688,0.3843,0.1335,0.3436,0.3443,11.3226,0.0913,0.7175,0.7507,4367,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET [5, 6]
TRAINING EPOCHS 2




LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Filter: 100%|██████████| 2379/2379 [00:00<00:00, 33208.69 examples/s]
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.3256,4.064366,0.3429,0.1078,0.3097,0.3096,9.8537,0.0588,0.5846,0.6507,3785,5817
2,3.2409,3.877703,0.3642,0.1261,0.3304,0.3313,10.8437,0.0893,0.6845,0.7251,4218,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET [7, 8]
TRAINING EPOCHS 2




LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Filter: 100%|██████████| 2379/2379 [00:00<00:00, 33469.46 examples/s]
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,3.7737,3.763443,0.3714,0.1304,0.3368,0.3369,12.0721,0.1163,0.8166,0.8315,4837,5817
2,2.9715,3.701935,0.3844,0.1373,0.3477,0.348,12.2465,0.1258,0.8227,0.8367,4867,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET [4]
TRAINING EPOCHS 2




LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Filter: 100%|██████████| 2379/2379 [00:00<00:00, 32637.24 examples/s]
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.2821,5.074299,0.3037,0.0858,0.2758,0.2771,9.5731,0.0256,0.5479,0.6244,3632,5817
2,4.8449,4.773075,0.3113,0.0864,0.2832,0.2835,9.4068,0.0248,0.5166,0.6022,3503,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET [5]
TRAINING EPOCHS 2




LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Filter: 100%|██████████| 2379/2379 [00:00<00:00, 32828.05 examples/s]
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.7374,4.645225,0.3151,0.0888,0.2898,0.2901,8.3968,0.024,0.4042,0.5247,3052,5817
2,3.6883,4.156934,0.3347,0.1043,0.3023,0.3035,9.7936,0.0497,0.571,0.6409,3728,5817


  return dynamo.is_compiling()


TRAINING EPOCH SET [1]
TRAINING EPOCHS 2




LOADING MODEL Salesforce/codet5-base-multi-sum
cuda


Filter: 100%|██████████| 2379/2379 [00:00<00:00, 31185.05 examples/s]
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.4294,4.075424,0.3412,0.1095,0.3104,0.3108,11.1523,0.0657,0.734,0.7638,4443,5817
2,3.2357,3.867348,0.3526,0.1164,0.3181,0.3188,11.7114,0.0893,0.7776,0.799,4648,5817


  return dynamo.is_compiling()


In [13]:
test_result_df = pd.concat([test_result_df, test_result_df_cluster])

In [22]:
########## ROUGE PER SETTING

print("Mean")
print(test_result_df.groupby("model_id")["rouge"].mean())

print("STD")
print(test_result_df.groupby("model_id")["rouge"].std())

Mean
model_id
cluster_set_0    0.376832
cluster_set_1    0.385097
cluster_set_2    0.363805
cluster_set_3    0.385271
cluster_set_4    0.310305
cluster_set_5    0.334277
cluster_set_6    0.352585
epoch_set_0      0.298044
epoch_set_1      0.430842
epoch_set_2      0.442460
epoch_set_3      0.442200
epoch_set_4      0.446764
epoch_set_7      0.446796
epoch_set_9      0.438847
Name: rouge, dtype: float64
STD
model_id
cluster_set_0    0.198200
cluster_set_1    0.193605
cluster_set_2    0.189683
cluster_set_3    0.195546
cluster_set_4    0.185538
cluster_set_5    0.187413
cluster_set_6    0.197707
epoch_set_0      0.181991
epoch_set_1      0.210997
epoch_set_2      0.214399
epoch_set_3      0.216437
epoch_set_4      0.217598
epoch_set_7      0.225082
epoch_set_9      0.224611
Name: rouge, dtype: float64


In [23]:
### ENSEMBLE COMPUTE
test_result_df["opt_es_id"] = test_result_df.id.map(optimal_ensemble_map)
ensemble_preds = test_result_df.loc[test_result_df["model_id"]==test_result_df["opt_es_id"], :]
ensemble_preds["rouge"].mean()

0.44504511636607547

In [27]:
optimal_ensemble_map

{0: 'epoch_set_3',
 1: 'epoch_set_2',
 2: 'epoch_set_2',
 3: 'epoch_set_2',
 4: 'epoch_set_2',
 5: 'epoch_set_7',
 6: 'epoch_set_2',
 7: 'epoch_set_2',
 8: 'epoch_set_2',
 9: 'epoch_set_2',
 10: 'epoch_set_2',
 11: 'epoch_set_1',
 12: 'epoch_set_2',
 13: 'epoch_set_2',
 14: 'epoch_set_2',
 15: 'epoch_set_2',
 16: 'epoch_set_2',
 17: 'epoch_set_2',
 18: 'epoch_set_2',
 19: 'epoch_set_2',
 20: 'epoch_set_2',
 21: 'epoch_set_2',
 22: 'epoch_set_2',
 23: 'epoch_set_2',
 24: 'epoch_set_2',
 25: 'epoch_set_2',
 26: 'epoch_set_2',
 27: 'epoch_set_2',
 28: 'epoch_set_1',
 29: 'epoch_set_2',
 30: 'epoch_set_2',
 31: 'epoch_set_2',
 32: 'epoch_set_2',
 33: 'epoch_set_7',
 34: 'epoch_set_2',
 35: 'epoch_set_2',
 36: 'epoch_set_7',
 37: 'epoch_set_2',
 38: 'epoch_set_2',
 39: 'epoch_set_2',
 40: 'epoch_set_2',
 41: 'epoch_set_2',
 42: 'epoch_set_1',
 43: 'epoch_set_2',
 44: 'epoch_set_2',
 45: 'epoch_set_2',
 46: 'epoch_set_2',
 47: 'epoch_set_2',
 48: 'epoch_set_2',
 49: 'epoch_set_7',
 50: 'epoc

In [21]:
test_result_df["opt_es_id"].value_counts()

opt_es_id
epoch_set_2    5586
epoch_set_7     602
epoch_set_1     574
epoch_set_3     210
epoch_set_9      14
Name: count, dtype: int64

In [24]:
test_result_df

Unnamed: 0,question_id,intent,output_sequence,input_sequence,idx,cluster,input_ids,attention_mask,labels,id,model_id,prediction,rouge,opt_es_id
0,38152389,Coalesce values from 2 columns into a single c...,combine values from column 'b' and column 'a' ...,"df['c'] = np.where(df['a'].isnull, df['b'], df...",2736,3,"[1, 2180, 3292, 71, 3546, 273, 1130, 18, 6051,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[1, 14082, 924, 628, 1057, 296, 70, 11, 471, 1...",0,epoch_set_0,Update c column,0.200000,epoch_set_3
1,6407780,How to extract data from JSON Object in Python?,extract data field 'bar' from json object,"json.loads('{""foo"": 42, ""bar"": ""baz""}')['bar']",1143,4,"[1, 1977, 18, 17135, 2668, 16711, 11351, 6877,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[1, 8004, 501, 652, 296, 3215, 11, 628, 1163, ...",1,epoch_set_0,JSON. parse returns 42,0.181818,epoch_set_2
2,6696027,split elements of a list in python,split strings in list `l` on the first occurri...,"[i.split('\t', 1)[0] for i in l]",1170,7,"[1, 63, 77, 18, 4939, 2668, 64, 88, 2187, 404,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[1, 4939, 2064, 316, 666, 1375, 80, 68, 603, 3...",2,epoch_set_0,Split the string into a list of strings,0.333333,epoch_set_2
3,179369,How do I abort the execution of a Python script?,abort the execution of the script using messag...,sys.exit('aa! errors!'),81,4,"[1, 9499, 18, 8593, 2668, 7598, 5, 1334, 5124,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]","[1, 18623, 326, 4588, 434, 326, 2728, 1450, 88...",3,epoch_set_0,Exit with an error message,0.266667,epoch_set_2
4,2823472,Is there a method that tells my program to quit?,quit program,sys.exit(0),672,4,"[1, 9499, 18, 8593, 12, 20, 13, 2, 0, 0, 0, 0,...","[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]","[1, 27176, 5402, 2, -100, -100, -100, -100, -1...",4,epoch_set_0,Exit the program with status 0.,0.250000,epoch_set_2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,1093322,check what version of Python is running,check python version,sys.version_info,415,4,"[1, 9499, 18, 1589, 67, 1376, 2, 0, 0, 0, 0, 0...","[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[1, 1893, 5790, 1177, 2, -100, -100, -100, -10...",494,cluster_set_6,get the version of sys. version_info,0.200000,epoch_set_2
495,14406214,Moving x-axis to the top of a plot in matplotlib,move x-axis to the top of a plot `ax`,ax.xaxis.tick_top(),1830,4,"[1, 651, 18, 92, 4890, 18, 6470, 67, 3669, 143...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]","[1, 8501, 619, 17, 4890, 358, 326, 1760, 434, ...",495,cluster_set_6,tick the top of the axis,0.500000,epoch_set_2
496,17071871,Select rows from a DataFrame based on values i...,select rows from a dataframe `df` whose value ...,df.loc[~df['column_name'].isin(some_values)],2026,3,"[1, 2180, 18, 1829, 63, 98, 2180, 3292, 2827, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[1, 4025, 2595, 628, 279, 12170, 1375, 2180, 6...",496,cluster_set_6,remove values from a dictionary `df` where the...,0.533333,epoch_set_2
497,5137497,Find current directory and file's directory,get the canonical path of file `path`,os.path.realpath(path),1035,9,"[1, 538, 18, 803, 18, 7688, 803, 12, 803, 13, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]","[1, 588, 326, 7378, 589, 434, 585, 1375, 803, ...",497,cluster_set_6,get the real path of a path,0.714286,epoch_set_2
