In [1]:
## All to folder
## generate prompt
# !ls data
# import time
# time.sleep(60*30)

# Try to do:
# torch.cuda.empty_cache()

In [2]:
import os
import json

import transformers
from peft import PeftModel
from transformers import LlamaForCausalLM as LLaMAForCausalLM
from transformers import LlamaTokenizer as LLaMATokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
from datasets import load_dataset
from EvaluateTestSet import EvaluateTestSet
from transformers.integrations import TensorBoardCallback
from transformers import GenerationConfig

def init_lora_model_and_tokenizer(default_model,
                             LORA_R,
                             LORA_ALPHA,
                             LORA_DROPOUT
                            ):


    """
        
    """
    model = LLaMAForCausalLM.from_pretrained(
    default_model,
    load_in_8bit=True,
    device_map="auto",
    )
    tokenizer = LLaMATokenizer.from_pretrained(
        default_model, add_eos_token=True
    )

    model = prepare_model_for_int8_training(model)

    config = LoraConfig(
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=LORA_DROPOUT,
        bias="none",
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, config)

    tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token

    return model, tokenizer



class MyCustomCallback(TensorBoardCallback):
    #log_bleu_steps_factor = 5
    bleu_generation_max_new_tokens = 30
    bleu_fn_test_data = "temp/t2c_answers.json"
    bleu_fn_etalon = "temp/answers.json"
    log_step = 0
    
    def on_log(self, args, state, control, logs=None, **kwargs):
        super().on_log(args, state, control, logs=logs, **kwargs)
        #print("kwargs", len(kwargs), kwargs.keys())
        if self.tb_writer is not None:
            #print(state)
            #print(state.global_step)
            #print(self.log_step)
            if (self.log_step % self.log_bleu_steps_factor ==0):
                model = kwargs['model']
                tokenizer = kwargs['tokenizer']
                
                model.eval()
                assert not model.training
                generation_config = GenerationConfig(max_new_tokens = self.bleu_generation_max_new_tokens,
                                                     # min_new_tokens = 5,
                                                     temperature = 1.0
                                                    )
                print("generation_config:", generation_config)
                evaluator = EvaluateTestSet(generation_config = generation_config,
                                            fn_test_data = self.bleu_fn_test_data,
                                            fn_etalon = self.bleu_fn_etalon,
                                            batch_size = 1
                                       )

                metric_res = evaluator.evaluate(model=model, 
                                                tokenizer=tokenizer,
                                               )
                model.train()
                assert model.training
                print(metric_res)
                for key, val in metric_res.items():
                    #add "custom/something"
                    self.tb_writer.add_scalar(key, val, state.global_step)
                self.tb_writer.flush()
            self.log_step += 1


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [3]:
CONFIG_PATH = "/root/experiments_configs/"
EXPERIMENTS_PATH = "/root/experiments/"
experiment_name = "t2c_concode_220428_v22"
# t2c_concode_220428_v18.json

In [4]:
!ls /root/experiments_configs

t2c_concode_220428_v14_config.json  t2c_concode_220428_v18_config.json
t2c_concode_220428_v15_config.json  t2c_concode_220428_v19_config.json
t2c_concode_220428_v16_config.json  t2c_concode_220428_v20_config.json


In [6]:
current_config_path = os.path.join(CONFIG_PATH, experiment_name + "_config.json")
experiment_config = json.load(open(current_config_path, "r"))

assert experiment_config['experiment_name'] == experiment_name

In [24]:
experiment_config['resume_from_checkpoint'] = False

In [8]:
assert experiment_config['resume_from_checkpoint'] == True

In [9]:
assert experiment_config['experiment_name'] == experiment_name

In [10]:
current_experiment_path = os.path.join(EXPERIMENTS_PATH, experiment_name)

In [23]:
!mkdir {current_experiment_path}

mkdir: cannot create directory ‘/root/experiments/t2c_concode_220428_v22’: File exists


In [12]:
json.dump(experiment_config, open(current_experiment_path + \
                                  "/experiment_config.json", 
                                  "w+"
                                 )
         )

In [13]:
setattr(MyCustomCallback, "log_bleu_steps_factor", experiment_config['log_bleu_steps_factor'])

In [14]:
MyCustomCallback.log_bleu_steps_factor

50

In [15]:
model, tokenizer = init_lora_model_and_tokenizer(default_model = experiment_config["default_model"],
                                                 LORA_R = experiment_config["LORA_R"],
                                                 LORA_ALPHA = experiment_config["LORA_ALPHA"],
                                                 LORA_DROPOUT = experiment_config["LORA_DROPOUT"]
                                                )


data = load_dataset("json", 
                    data_files = {"train": experiment_config["fn_train_dataset"],
                                  "eval":  experiment_config["fn_eval_dataset"]
                                 }
                   )



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.
Found cached dataset json (/root/.cache/huggingface/datasets/json/default-3ac2744fedc77f2f/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
tokenizer_val = LLaMATokenizer.from_pretrained(
    experiment_config['default_model'], add_eos_token=True
)
tokenizer_val.pad_token_id = 0  # unk. we want this to be different from the eos token


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [17]:
# experiment_config["logging_steps"] = 1
# experiment_config["eval_steps"] = 1

In [18]:
experiment_config
# {'experiment_name': 't2c_concode_220428_v19',
#  'fn_train_dataset': '/root/data/t2c_train.json',
#  'fn_eval_dataset': '/root/data/t2c_answers.json',
#  'default_model': 'decapoda-research/llama-7b-hf',
#  'MICRO_BATCH_SIZE': 2,
#  'BATCH_SIZE': 10,
#  'EPOCHS': 2,
#  'LEARNING_RATE': 0.0002,
#  'CUTOFF_LEN': 256,
#  'LORA_R': 16,
#  'LORA_ALPHA': 16,
#  'LORA_DROPOUT': 0.05,
#  'warmup_steps': 200,
#  'fp16': True,
#  'logging_steps': 10,
#  'eval_steps': 100,
#  'evaluation_strategy': 'steps',
#  'save_total_limit': 1,
#  'save_strategy': 'steps',
#  'save_steps': 500,
#  'seed': 42,
#  'logging_strategy': 'steps',
#  'report_to': 'tensorboard',
#  'mlm': False,
#  'truncation': True,
#  'padding': 'max_length',
#  'config_use_cache': False,
#  'resume_from_checkpoint': False,
#  'bleu_batch_size': 5,
#  'GRADIENT_ACCUMULATION_STEPS': 5,
#  'log_bleu_steps_factor': 50}

{'experiment_name': 't2c_concode_220428_v22',
 'fn_train_dataset': '/root/data/t2c_train.json',
 'fn_eval_dataset': '/root/data/t2c_answers.json',
 'default_model': 'decapoda-research/llama-7b-hf',
 'MICRO_BATCH_SIZE': 2,
 'BATCH_SIZE': 10,
 'EPOCHS': 2,
 'LEARNING_RATE': 0.0002,
 'CUTOFF_LEN': 256,
 'LORA_R': 64,
 'LORA_ALPHA': 64,
 'LORA_DROPOUT': 0.05,
 'warmup_steps': 200,
 'fp16': True,
 'logging_steps': 10,
 'eval_steps': 100,
 'evaluation_strategy': 'steps',
 'save_total_limit': 1,
 'save_strategy': 'steps',
 'save_steps': 500,
 'seed': 42,
 'logging_strategy': 'steps',
 'report_to': 'tensorboard',
 'mlm': False,
 'truncation': True,
 'padding': 'max_length',
 'config_use_cache': False,
 'resume_from_checkpoint': True,
 'bleu_batch_size': 5,
 'GRADIENT_ACCUMULATION_STEPS': 5,
 'log_bleu_steps_factor': 50}

In [19]:
# experiment_config["resume_from_checkpoint"]

In [20]:
from prompter import Prompter
prompter = Prompter()

def generate_prompt(data_point):
    if "input" in data_point and data_point["input"]:
        return prompter.generate_prompt(instruction = data_point["instruction"],
                                        input = data_point["input"],
                                        label = data_point["output"]
                                       )
    else:
        return prompter.generate_prompt(instruction = data_point["instruction"],
                                        #input = None,
                                        label = data_point["output"]
                                       )

/root/ipynb/prompter/templates/


In [21]:
# experiment_config["resume_from_checkpoint"] =

In [25]:
# def generate_prompt(data_point):
#     # sorry about the formatting disaster gotta move fast
#     if data_point["input"]:
#         return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
# ### Instruction:
# {data_point["instruction"]}
# ### Input:
# {data_point["input"]}
# ### Response:
# {data_point["output"]}"""
#     else:
#         return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
# ### Instruction:
# {data_point["instruction"]}
# ### Response:
# {data_point["output"]}"""


data = data.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=experiment_config["truncation"],
        max_length=experiment_config["CUTOFF_LEN"],
        padding=experiment_config["padding"]
    )
)

trainer = transformers.Trainer(
    model=model,
    tokenizer=tokenizer_val,
    train_dataset=data["train"],
    eval_dataset=data['eval'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=experiment_config["MICRO_BATCH_SIZE"],
        gradient_accumulation_steps=experiment_config["GRADIENT_ACCUMULATION_STEPS"],
        warmup_steps=experiment_config["warmup_steps"],
        num_train_epochs=experiment_config["EPOCHS"],
        learning_rate=experiment_config["LEARNING_RATE"],
        fp16=experiment_config["fp16"],
        logging_steps=experiment_config["logging_steps"],        
        evaluation_strategy = experiment_config['evaluation_strategy'],
        eval_steps=experiment_config["eval_steps"],
        output_dir=current_experiment_path,#"lora-alpaca",
        save_total_limit=experiment_config["save_total_limit"],
        save_strategy = experiment_config["save_strategy"],
        
        save_steps = experiment_config["save_steps"],
        seed=experiment_config["seed"],
        logging_dir=current_experiment_path,
        logging_strategy=experiment_config["logging_strategy"],
        report_to=experiment_config["report_to"]
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, 
                                                               mlm=experiment_config["mlm"]
                                                              ),
    callbacks = [MyCustomCallback]
)
model.config.use_cache = experiment_config["config_use_cache"]
# print(len(trainer.optimizer.state['found_inf_per_device']))


trainer.train(resume_from_checkpoint=experiment_config["resume_from_checkpoint"])

model.save_pretrained(current_experiment_path)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
100,1.0985,1.200303
200,1.038,1.140061
300,0.9879,1.125297
400,0.9988,1.120366
500,0.9473,1.11298
600,0.976,1.113665
700,0.9277,1.10895
800,0.953,1.108034
900,0.9414,1.107022
1000,0.9432,1.105373


generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



  0%|                                                    | 0/30 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|███████████████████████████████████████████| 30/30 [01:50<00:00,  3.70s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 45806.01it/s]


{'EM': 0.0, 'BLEU': 0.0028138222263932766, 'brevity_penalty': 0.30590154455896784, 'ratio': 0.45777233782129745, 'translation_length': 374, 'reference_length': 817, 'precisions_0': 0.056, 'precisions_1': 0.011594202898550725, 'precisions_2': 0.0031645569620253164, 'precisions_3': 0.003484320557491289}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.50s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 150874.24it/s]


{'EM': 0.0, 'BLEU': 0.20810517743045512, 'brevity_penalty': 0.7200351241985318, 'ratio': 0.7527539779681762, 'translation_length': 615, 'reference_length': 817, 'precisions_0': 0.5551948051948052, 'precisions_1': 0.36177474402730375, 'precisions_2': 0.22841726618705036, 'precisions_3': 0.1520912547528517}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.51s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 48960.75it/s]


{'EM': 0.0, 'BLEU': 0.23521150037436225, 'brevity_penalty': 0.7246934997419091, 'ratio': 0.7564259485924113, 'translation_length': 618, 'reference_length': 817, 'precisions_0': 0.5573505654281099, 'precisions_1': 0.3938879456706282, 'precisions_2': 0.26475849731663686, 'precisions_3': 0.19092627599243855}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:48<00:00,  3.62s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 155922.08it/s]


{'EM': 0.0, 'BLEU': 0.23638696232851494, 'brevity_penalty': 0.6645662613783451, 'ratio': 0.7099143206854345, 'translation_length': 580, 'reference_length': 817, 'precisions_0': 0.621342512908778, 'precisions_1': 0.426497277676951, 'precisions_2': 0.29366602687140114, 'precisions_3': 0.20570264765784113}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 161942.24it/s]


{'EM': 0.0, 'BLEU': 0.2192294686296193, 'brevity_penalty': 0.6901782584838726, 'ratio': 0.7294981640146879, 'translation_length': 596, 'reference_length': 817, 'precisions_0': 0.5561139028475712, 'precisions_1': 0.37742504409171074, 'precisions_2': 0.25884543761638734, 'precisions_3': 0.1873767258382643}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.55s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 170039.35it/s]


{'EM': 0.0, 'BLEU': 0.22773001714820523, 'brevity_penalty': 0.6774254411055589, 'ratio': 0.7197062423500612, 'translation_length': 588, 'reference_length': 817, 'precisions_0': 0.5959252971137521, 'precisions_1': 0.407871198568873, 'precisions_2': 0.27599243856332706, 'precisions_3': 0.1903807615230461}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.52s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 168671.74it/s]


{'EM': 0.0, 'BLEU': 0.2093400464543299, 'brevity_penalty': 0.7075381305279058, 'ratio': 0.7429620563035496, 'translation_length': 607, 'reference_length': 817, 'precisions_0': 0.5444078947368421, 'precisions_1': 0.3546712802768166, 'precisions_2': 0.23905109489051096, 'precisions_3': 0.16602316602316602}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.58s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 55627.37it/s]


{'EM': 0.0, 'BLEU': 0.2125711326947317, 'brevity_penalty': 0.5953609846006505, 'ratio': 0.6585067319461444, 'translation_length': 538, 'reference_length': 817, 'precisions_0': 0.5974025974025974, 'precisions_1': 0.4204322200392927, 'precisions_2': 0.2964509394572025, 'precisions_3': 0.2182628062360802}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 162569.92it/s]


{'EM': 0.0, 'BLEU': 0.21909004786713776, 'brevity_penalty': 0.6203967900462553, 'ratio': 0.6768665850673194, 'translation_length': 553, 'reference_length': 817, 'precisions_0': 0.6010830324909747, 'precisions_1': 0.41603053435114506, 'precisions_2': 0.291497975708502, 'precisions_3': 0.21336206896551724}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 141859.21it/s]

{'EM': 0.0, 'BLEU': 0.22067874296130954, 'brevity_penalty': 0.63196038331477, 'ratio': 0.6854345165238678, 'translation_length': 560, 'reference_length': 817, 'precisions_0': 0.6007130124777184, 'precisions_1': 0.4124293785310734, 'precisions_2': 0.29141716566866266, 'precisions_3': 0.2059447983014862}





generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 42930.44it/s]


{'EM': 0.0, 'BLEU': 0.19696847356049732, 'brevity_penalty': 0.5869397994961546, 'ratio': 0.6523867809057528, 'translation_length': 533, 'reference_length': 817, 'precisions_0': 0.5917602996254682, 'precisions_1': 0.39880952380952384, 'precisions_2': 0.2742616033755274, 'precisions_3': 0.19594594594594594}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:49<00:00,  3.66s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 158875.15it/s]


{'EM': 0.0, 'BLEU': 0.220534521681984, 'brevity_penalty': 0.6629514358413311, 'ratio': 0.7086903304773562, 'translation_length': 579, 'reference_length': 817, 'precisions_0': 0.5655172413793104, 'precisions_1': 0.39636363636363636, 'precisions_2': 0.27884615384615385, 'precisions_3': 0.19591836734693877}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:48<00:00,  3.63s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 39482.00it/s]


{'EM': 0.0, 'BLEU': 0.2192560562388062, 'brevity_penalty': 0.7075381305279058, 'ratio': 0.7429620563035496, 'translation_length': 607, 'reference_length': 817, 'precisions_0': 0.5575657894736842, 'precisions_1': 0.3685121107266436, 'precisions_2': 0.25547445255474455, 'precisions_3': 0.17567567567567569}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 48770.98it/s]


{'EM': 0.0, 'BLEU': 0.2246996769490805, 'brevity_penalty': 0.6516017749168791, 'ratio': 0.7001223990208079, 'translation_length': 572, 'reference_length': 817, 'precisions_0': 0.5794066317626527, 'precisions_1': 0.40331491712707185, 'precisions_2': 0.28654970760233917, 'precisions_3': 0.2111801242236025}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.54s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 162991.09it/s]


{'EM': 0.0, 'BLEU': 0.19697319347217687, 'brevity_penalty': 0.6901782584838726, 'ratio': 0.7294981640146879, 'translation_length': 596, 'reference_length': 817, 'precisions_0': 0.5309882747068677, 'precisions_1': 0.3562610229276896, 'precisions_2': 0.2309124767225326, 'precisions_3': 0.15187376725838264}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 167772.16it/s]


{'EM': 0.0, 'BLEU': 0.21976307427592082, 'brevity_penalty': 0.6645662613783451, 'ratio': 0.7099143206854345, 'translation_length': 580, 'reference_length': 817, 'precisions_0': 0.5731497418244407, 'precisions_1': 0.3956442831215971, 'precisions_2': 0.272552783109405, 'precisions_3': 0.1934826883910387}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 169352.79it/s]


{'EM': 0.0, 'BLEU': 0.22567287376076783, 'brevity_penalty': 0.6580971081453522, 'ratio': 0.7050183598531212, 'translation_length': 576, 'reference_length': 817, 'precisions_0': 0.5823223570190641, 'precisions_1': 0.40950639853747717, 'precisions_2': 0.28239845261121854, 'precisions_3': 0.2053388090349076}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:48<00:00,  3.63s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 52494.42it/s]


{'EM': 0.0, 'BLEU': 0.22126883576346396, 'brevity_penalty': 0.6385332304644112, 'ratio': 0.6903304773561811, 'translation_length': 564, 'reference_length': 817, 'precisions_0': 0.6017699115044248, 'precisions_1': 0.41869158878504675, 'precisions_2': 0.28316831683168314, 'precisions_3': 0.20210526315789473}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 164482.51it/s]


{'EM': 0.0, 'BLEU': 0.21401714162504726, 'brevity_penalty': 0.6170787554426814, 'ratio': 0.6744186046511628, 'translation_length': 551, 'reference_length': 817, 'precisions_0': 0.5797101449275363, 'precisions_1': 0.4118773946360153, 'precisions_2': 0.2886178861788618, 'precisions_3': 0.20995670995670995}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 159479.24it/s]


{'EM': 0.0, 'BLEU': 0.2263903024035063, 'brevity_penalty': 0.6120900220656069, 'ratio': 0.6707466340269278, 'translation_length': 548, 'reference_length': 817, 'precisions_0': 0.6138433515482696, 'precisions_1': 0.4373795761078998, 'precisions_2': 0.3047034764826176, 'precisions_3': 0.22875816993464052}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.58s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 166220.77it/s]

{'EM': 0.0, 'BLEU': 0.20821557799189688, 'brevity_penalty': 0.6187385540544988, 'ratio': 0.6756425948592412, 'translation_length': 552, 'reference_length': 817, 'precisions_0': 0.5804701627486437, 'precisions_1': 0.39579349904397704, 'precisions_2': 0.2778904665314402, 'precisions_3': 0.20086393088552915}





generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.58s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 144134.16it/s]


{'EM': 0.0, 'BLEU': 0.22373485829470582, 'brevity_penalty': 0.6220534596927131, 'ratio': 0.6780905752753978, 'translation_length': 554, 'reference_length': 817, 'precisions_0': 0.6018018018018018, 'precisions_1': 0.42857142857142855, 'precisions_2': 0.29292929292929293, 'precisions_3': 0.221505376344086}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.59s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 50111.16it/s]


{'EM': 0.0, 'BLEU': 0.21588654931506668, 'brevity_penalty': 0.5987189659716076, 'ratio': 0.6609547123623011, 'translation_length': 540, 'reference_length': 817, 'precisions_0': 0.6025878003696857, 'precisions_1': 0.42857142857142855, 'precisions_2': 0.29521829521829523, 'precisions_3': 0.22172949002217296}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 170963.48it/s]


{'EM': 0.0, 'BLEU': 0.21502321174892258, 'brevity_penalty': 0.6368924160294538, 'ratio': 0.6891064871481029, 'translation_length': 563, 'reference_length': 817, 'precisions_0': 0.5797872340425532, 'precisions_1': 0.40823970037453183, 'precisions_2': 0.27976190476190477, 'precisions_3': 0.1962025316455696}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 161526.47it/s]


{'EM': 0.0, 'BLEU': 0.22854686908576086, 'brevity_penalty': 0.6418100460327502, 'ratio': 0.6927784577723378, 'translation_length': 566, 'reference_length': 817, 'precisions_0': 0.5731922398589065, 'precisions_1': 0.4171322160148976, 'precisions_2': 0.29980276134122286, 'precisions_3': 0.22431865828092243}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.59s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 152705.24it/s]


{'EM': 0.0, 'BLEU': 0.21912348088566602, 'brevity_penalty': 0.6070873651895963, 'ratio': 0.6670746634026927, 'translation_length': 545, 'reference_length': 817, 'precisions_0': 0.5934065934065934, 'precisions_1': 0.42441860465116277, 'precisions_2': 0.29835390946502055, 'precisions_3': 0.22587719298245615}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 39882.45it/s]


{'EM': 0.0, 'BLEU': 0.21141494781180065, 'brevity_penalty': 0.6253620853529512, 'ratio': 0.6805385556915544, 'translation_length': 556, 'reference_length': 817, 'precisions_0': 0.5888689407540395, 'precisions_1': 0.3984819734345351, 'precisions_2': 0.27364185110663986, 'precisions_3': 0.20342612419700215}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.55s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 159681.62it/s]


{'EM': 0.0, 'BLEU': 0.2220270724249386, 'brevity_penalty': 0.6613349669059593, 'ratio': 0.7074663402692778, 'translation_length': 578, 'reference_length': 817, 'precisions_0': 0.5630397236614854, 'precisions_1': 0.3989071038251366, 'precisions_2': 0.279383429672447, 'precisions_3': 0.20245398773006135}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 48507.76it/s]


{'EM': 0.0, 'BLEU': 0.21296956795121147, 'brevity_penalty': 0.6237085593295825, 'ratio': 0.6793145654834761, 'translation_length': 555, 'reference_length': 817, 'precisions_0': 0.5809352517985612, 'precisions_1': 0.4011406844106464, 'precisions_2': 0.28024193548387094, 'precisions_3': 0.20815450643776823}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 160906.80it/s]


{'EM': 0.0, 'BLEU': 0.21854010176418737, 'brevity_penalty': 0.6303131865967199, 'ratio': 0.6842105263157895, 'translation_length': 559, 'reference_length': 817, 'precisions_0': 0.5857142857142857, 'precisions_1': 0.41509433962264153, 'precisions_2': 0.288, 'precisions_3': 0.20638297872340425}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:48<00:00,  3.60s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 55553.70it/s]


{'EM': 0.0, 'BLEU': 0.21364212372472507, 'brevity_penalty': 0.6037445798285037, 'ratio': 0.6646266829865362, 'translation_length': 543, 'reference_length': 817, 'precisions_0': 0.5900735294117647, 'precisions_1': 0.4182879377431907, 'precisions_2': 0.29132231404958675, 'precisions_3': 0.21806167400881057}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 161734.09it/s]

{'EM': 0.0, 'BLEU': 0.21250228265532048, 'brevity_penalty': 0.6070873651895963, 'ratio': 0.6670746634026927, 'translation_length': 545, 'reference_length': 817, 'precisions_0': 0.5787545787545788, 'precisions_1': 0.40891472868217055, 'precisions_2': 0.29218106995884774, 'precisions_3': 0.21710526315789475}





generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.58s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 161112.83it/s]


{'EM': 0.0, 'BLEU': 0.2029416192701593, 'brevity_penalty': 0.6253620853529512, 'ratio': 0.6805385556915544, 'translation_length': 556, 'reference_length': 817, 'precisions_0': 0.5691202872531418, 'precisions_1': 0.3908918406072106, 'precisions_2': 0.2676056338028169, 'precisions_3': 0.18629550321199143}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.58s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 157878.44it/s]


{'EM': 0.0, 'BLEU': 0.21555934933771081, 'brevity_penalty': 0.6286644024420278, 'ratio': 0.6829865361077111, 'translation_length': 558, 'reference_length': 817, 'precisions_0': 0.5867620751341681, 'precisions_1': 0.4102079395085066, 'precisions_2': 0.280561122244489, 'precisions_3': 0.2046908315565032}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 163626.94it/s]


{'EM': 0.0, 'BLEU': 0.2075769588838348, 'brevity_penalty': 0.63196038331477, 'ratio': 0.6854345165238678, 'translation_length': 560, 'reference_length': 817, 'precisions_0': 0.5579322638146168, 'precisions_1': 0.3879472693032015, 'precisions_2': 0.2694610778443114, 'precisions_3': 0.19957537154989385}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 166661.09it/s]


{'EM': 0.0, 'BLEU': 0.2219371420704936, 'brevity_penalty': 0.6532280539448486, 'ratio': 0.7013463892288861, 'translation_length': 573, 'reference_length': 817, 'precisions_0': 0.5696864111498258, 'precisions_1': 0.4025735294117647, 'precisions_2': 0.2840466926070039, 'precisions_3': 0.20454545454545456}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 169581.02it/s]


{'EM': 0.0, 'BLEU': 0.2084547186531027, 'brevity_penalty': 0.6286644024420278, 'ratio': 0.6829865361077111, 'translation_length': 558, 'reference_length': 817, 'precisions_0': 0.5688729874776386, 'precisions_1': 0.3931947069943289, 'precisions_2': 0.2725450901803607, 'precisions_3': 0.19829424307036247}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 41174.45it/s]


{'EM': 0.0, 'BLEU': 0.202709812540591, 'brevity_penalty': 0.6237085593295825, 'ratio': 0.6793145654834761, 'translation_length': 555, 'reference_length': 817, 'precisions_0': 0.552158273381295, 'precisions_1': 0.3916349809885932, 'precisions_2': 0.2701612903225806, 'precisions_3': 0.19098712446351931}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 163626.94it/s]


{'EM': 0.0, 'BLEU': 0.20994192552867763, 'brevity_penalty': 0.6580971081453522, 'ratio': 0.7050183598531212, 'translation_length': 576, 'reference_length': 817, 'precisions_0': 0.561525129982669, 'precisions_1': 0.38939670932358317, 'precisions_2': 0.25918762088974856, 'precisions_3': 0.18275154004106775}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 146312.93it/s]


{'EM': 0.0, 'BLEU': 0.20717945438972582, 'brevity_penalty': 0.6253620853529512, 'ratio': 0.6805385556915544, 'translation_length': 556, 'reference_length': 817, 'precisions_0': 0.5547576301615799, 'precisions_1': 0.3984819734345351, 'precisions_2': 0.27364185110663986, 'precisions_3': 0.19914346895074947}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 160496.33it/s]


{'EM': 0.0, 'BLEU': 0.20503232433829952, 'brevity_penalty': 0.6450804239336081, 'ratio': 0.6952264381884945, 'translation_length': 568, 'reference_length': 817, 'precisions_0': 0.5729349736379613, 'precisions_1': 0.3914656771799629, 'precisions_2': 0.25343811394891946, 'precisions_3': 0.17954070981210857}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.57s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 163626.94it/s]


{'EM': 0.0, 'BLEU': 0.20205400586294964, 'brevity_penalty': 0.635250001256266, 'ratio': 0.6878824969400245, 'translation_length': 562, 'reference_length': 817, 'precisions_0': 0.5577264653641207, 'precisions_1': 0.38461538461538464, 'precisions_2': 0.25646123260437376, 'precisions_3': 0.18604651162790697}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.58s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 170269.45it/s]

{'EM': 0.0, 'BLEU': 0.21432548948692853, 'brevity_penalty': 0.6368924160294538, 'ratio': 0.6891064871481029, 'translation_length': 563, 'reference_length': 817, 'precisions_0': 0.5762411347517731, 'precisions_1': 0.40262172284644193, 'precisions_2': 0.2757936507936508, 'precisions_3': 0.20042194092827004}





generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.56s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 161942.24it/s]


{'EM': 0.0, 'BLEU': 0.2164318416449623, 'brevity_penalty': 0.6434460411594718, 'ratio': 0.6940024479804161, 'translation_length': 567, 'reference_length': 817, 'precisions_0': 0.5757042253521126, 'precisions_1': 0.4033457249070632, 'precisions_2': 0.27165354330708663, 'precisions_3': 0.20292887029288703}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:47<00:00,  3.58s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 159479.24it/s]

{'EM': 0.0, 'BLEU': 0.2027390527718878, 'brevity_penalty': 0.6070873651895963, 'ratio': 0.6670746634026927, 'translation_length': 545, 'reference_length': 817, 'precisions_0': 0.5842490842490843, 'precisions_1': 0.40310077519379844, 'precisions_2': 0.2736625514403292, 'precisions_3': 0.19298245614035087}





In [28]:
experiment_config

{'experiment_name': 't2c_concode_220428_v22',
 'fn_train_dataset': '/root/data/t2c_train.json',
 'fn_eval_dataset': '/root/data/t2c_answers.json',
 'default_model': 'decapoda-research/llama-7b-hf',
 'MICRO_BATCH_SIZE': 2,
 'BATCH_SIZE': 10,
 'EPOCHS': 2,
 'LEARNING_RATE': 0.0002,
 'CUTOFF_LEN': 256,
 'LORA_R': 64,
 'LORA_ALPHA': 64,
 'LORA_DROPOUT': 0.05,
 'warmup_steps': 200,
 'fp16': True,
 'logging_steps': 10,
 'eval_steps': 100,
 'evaluation_strategy': 'steps',
 'save_total_limit': 1,
 'save_strategy': 'steps',
 'save_steps': 500,
 'seed': 42,
 'logging_strategy': 'steps',
 'report_to': 'tensorboard',
 'mlm': False,
 'truncation': True,
 'padding': 'max_length',
 'config_use_cache': False,
 'resume_from_checkpoint': False,
 'bleu_batch_size': 5,
 'GRADIENT_ACCUMULATION_STEPS': 5,
 'log_bleu_steps_factor': 50}

In [27]:
model.save_pretrained(current_experiment_path)

In [None]:
# !rm -rf {current_experiment_path}

In [None]:
!df -h .

In [None]:
!ls -lah {current_experiment_path}

In [None]:
current_experiment_path

In [None]:
!ls -lah /root/experiments/t2c_concode_220428_v20/checkpoint-500

In [None]:
!ls -lah /root/experiments/t2c_concode_220428_v20/checkpoint-500

In [None]:
# !rm -rf /root/experiments/t2c_concode_220428_v19/checkpoint-20000/
# !df -h .

In [None]:
# !rm -rf {current_experiment_path}/checkpoint-1000

In [None]:
print(123)

123


In [None]:
# import sys
# sys.exit()

In [None]:
0