In [1]:
## All to folder
## generate prompt
# !ls data
# import time
# time.sleep(60*30)

# Try to do:
# torch.cuda.empty_cache()

In [2]:
import os
import json

import transformers
from peft import PeftModel
from transformers import LlamaForCausalLM as LLaMAForCausalLM
from transformers import LlamaTokenizer as LLaMATokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
from datasets import load_dataset
from EvaluateTestSet import EvaluateTestSet
from transformers.integrations import TensorBoardCallback
from transformers import GenerationConfig

def init_lora_model_and_tokenizer(default_model,
                             LORA_R,
                             LORA_ALPHA,
                             LORA_DROPOUT
                            ):


    """
        
    """
    model = LLaMAForCausalLM.from_pretrained(
    default_model,
    load_in_8bit=True,
    device_map="auto",
    )
    tokenizer = LLaMATokenizer.from_pretrained(
        default_model, add_eos_token=True
    )

    model = prepare_model_for_int8_training(model)

    config = LoraConfig(
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=LORA_DROPOUT,
        bias="none",
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, config)

    tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token

    return model, tokenizer



class MyCustomCallback(TensorBoardCallback):
    #log_bleu_steps_factor = 5
    bleu_generation_max_new_tokens = 30
    bleu_fn_test_data = "temp/t2c_answers.json"
    bleu_fn_etalon = "temp/answers.json"
    log_step = 0
    
    def on_log(self, args, state, control, logs=None, **kwargs):
        super().on_log(args, state, control, logs=logs, **kwargs)
        #print("kwargs", len(kwargs), kwargs.keys())
        if self.tb_writer is not None:
            #print(state)
            #print(state.global_step)
            #print(self.log_step)
            if (self.log_step % self.log_bleu_steps_factor ==0):
                model = kwargs['model']
                tokenizer = kwargs['tokenizer']
                
                model.eval()
                assert not model.training
                generation_config = GenerationConfig(max_new_tokens = self.bleu_generation_max_new_tokens,
                                                     # min_new_tokens = 5,
                                                     temperature = 1.0
                                                    )
                print("generation_config:", generation_config)
                evaluator = EvaluateTestSet(generation_config = generation_config,
                                            fn_test_data = self.bleu_fn_test_data,
                                            fn_etalon = self.bleu_fn_etalon,
                                            batch_size = 1
                                       )

                metric_res = evaluator.evaluate(model=model, 
                                                tokenizer=tokenizer,
                                               )
                model.train()
                assert model.training
                print(metric_res)
                for key, val in metric_res.items():
                    #add "custom/something"
                    self.tb_writer.add_scalar(key, val, state.global_step)
                self.tb_writer.flush()
            self.log_step += 1


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [3]:
CONFIG_PATH = "/root/experiments_config/"
EXPERIMENTS_PATH = "/root/experiments/"
experiment_name = "t2c_concode_220428_v17"

In [4]:
current_config_path = os.path.join(CONFIG_PATH, experiment_name + "_config.json")
experiment_config = json.load(open(current_config_path, "r"))

assert experiment_config['experiment_name'] == experiment_name

In [7]:
experiment_config['resume_from_checkpoint'] = True

In [8]:
assert experiment_config['resume_from_checkpoint'] == True

In [9]:
assert experiment_config['experiment_name'] == experiment_name

In [10]:
current_experiment_path = os.path.join(EXPERIMENTS_PATH, experiment_name)

In [11]:
!mkdir {current_experiment_path}

mkdir: cannot create directory ‘/root/experiments/t2c_concode_220428_v16’: File exists


In [12]:
json.dump(experiment_config, open(current_experiment_path + \
                                  "/experiment_config.json", 
                                  "w+"
                                 )
         )

In [13]:
setattr(MyCustomCallback, "log_bleu_steps_factor", experiment_config['log_bleu_steps_factor'])

In [14]:
MyCustomCallback.log_bleu_steps_factor

50

In [15]:
model, tokenizer = init_lora_model_and_tokenizer(default_model = experiment_config["default_model"],
                                                 LORA_R = experiment_config["LORA_R"],
                                                 LORA_ALPHA = experiment_config["LORA_ALPHA"],
                                                 LORA_DROPOUT = experiment_config["LORA_DROPOUT"]
                                                )


data = load_dataset("json", 
                    data_files = {"train": experiment_config["fn_train_dataset"],
                                  "eval":  experiment_config["fn_eval_dataset"]
                                 }
                   )



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.
Found cached dataset json (/root/.cache/huggingface/datasets/json/default-3ac2744fedc77f2f/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
tokenizer_val = LLaMATokenizer.from_pretrained(
    experiment_config['default_model'], add_eos_token=True
)
tokenizer_val.pad_token_id = 0  # unk. we want this to be different from the eos token


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [17]:
# experiment_config["logging_steps"] = 1
# experiment_config["eval_steps"] = 1

In [18]:
experiment_config

{'experiment_name': 't2c_concode_220428_v16',
 'fn_train_dataset': '/root/data/t2c_train.json',
 'fn_eval_dataset': '/root/data/t2c_answers.json',
 'default_model': 'decapoda-research/llama-7b-hf',
 'MICRO_BATCH_SIZE': 2,
 'BATCH_SIZE': 10,
 'EPOCHS': 2,
 'LEARNING_RATE': 0.0002,
 'CUTOFF_LEN': 256,
 'LORA_R': 4,
 'LORA_ALPHA': 16,
 'LORA_DROPOUT': 0.05,
 'warmup_steps': 200,
 'fp16': True,
 'logging_steps': 10,
 'eval_steps': 100,
 'evaluation_strategy': 'steps',
 'save_total_limit': 1,
 'save_strategy': 'steps',
 'save_steps': 500,
 'seed': 42,
 'logging_strategy': 'steps',
 'report_to': 'tensorboard',
 'mlm': False,
 'truncation': True,
 'padding': 'max_length',
 'config_use_cache': False,
 'resume_from_checkpoint': True,
 'bleu_batch_size': 5,
 'GRADIENT_ACCUMULATION_STEPS': 5,
 'log_bleu_steps_factor': 50}

In [19]:
# experiment_config["resume_from_checkpoint"]

In [20]:
from prompter import Prompter
prompter = Prompter()

def generate_prompt(data_point):
    if "input" in data_point and data_point["input"]:
        return prompter.generate_prompt(instruction = data_point["instruction"],
                                        input = data_point["input"],
                                        label = data_point["output"]
                                       )
    else:
        return prompter.generate_prompt(instruction = data_point["instruction"],
                                        #input = None,
                                        label = data_point["output"]
                                       )

/root/ipynb/prompter/templates/


In [29]:
# def generate_prompt(data_point):
#     # sorry about the formatting disaster gotta move fast
#     if data_point["input"]:
#         return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
# ### Instruction:
# {data_point["instruction"]}
# ### Input:
# {data_point["input"]}
# ### Response:
# {data_point["output"]}"""
#     else:
#         return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
# ### Instruction:
# {data_point["instruction"]}
# ### Response:
# {data_point["output"]}"""


data = data.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=experiment_config["truncation"],
        max_length=experiment_config["CUTOFF_LEN"],
        padding=experiment_config["padding"]
    )
)

trainer = transformers.Trainer(
    model=model,
    tokenizer=tokenizer_val,
    train_dataset=data["train"],
    eval_dataset=data['eval'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=experiment_config["MICRO_BATCH_SIZE"],
        gradient_accumulation_steps=experiment_config["GRADIENT_ACCUMULATION_STEPS"],
        warmup_steps=experiment_config["warmup_steps"],
        num_train_epochs=experiment_config["EPOCHS"],
        learning_rate=experiment_config["LEARNING_RATE"],
        fp16=experiment_config["fp16"],
        logging_steps=experiment_config["logging_steps"],        
        evaluation_strategy = experiment_config['evaluation_strategy'],
        eval_steps=experiment_config["eval_steps"],
        output_dir=current_experiment_path,#"lora-alpaca",
        save_total_limit=experiment_config["save_total_limit"],
        save_strategy = experiment_config["save_strategy"],
        
        save_steps = experiment_config["save_steps"],
        seed=experiment_config["seed"],
        logging_dir=current_experiment_path,
        logging_strategy=experiment_config["logging_strategy"],
        report_to=experiment_config["report_to"]
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, 
                                                               mlm=experiment_config["mlm"]
                                                              ),
    callbacks = [MyCustomCallback]
)
model.config.use_cache = experiment_config["config_use_cache"]
# print(len(trainer.optimizer.state['found_inf_per_device']))


trainer.train(resume_from_checkpoint=experiment_config["resume_from_checkpoint"])

model.save_pretrained(current_experiment_path)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
600,0.9753,1.121514
700,0.9946,1.116789
800,0.952,1.115059
900,0.9411,1.1126
1000,0.9378,1.110446
1100,0.9356,1.105852
1200,0.9331,1.107501
1300,0.9106,1.106961
1400,0.9648,1.104875
1500,0.9706,1.099931


generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



  0%|                                                    | 0/30 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.51s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 40226.70it/s]


{'EM': 0.0, 'BLEU': 0.09874452435765424, 'brevity_penalty': 0.5477365891588007, 'ratio': 0.6242350061199511, 'translation_length': 510, 'reference_length': 817, 'precisions_0': 0.44422700587084146, 'precisions_1': 0.24324324324324326, 'precisions_2': 0.1286031042128603, 'precisions_3': 0.07600950118764846}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.51s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 168445.94it/s]


{'EM': 0.0, 'BLEU': 0.1647954059807245, 'brevity_penalty': 0.7215896154822341, 'ratio': 0.7539779681762546, 'translation_length': 616, 'reference_length': 817, 'precisions_0': 0.4959481361426256, 'precisions_1': 0.3049403747870528, 'precisions_2': 0.17235188509874327, 'precisions_3': 0.10436432637571158}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.51s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 149618.45it/s]


{'EM': 0.0, 'BLEU': 0.21339055267271648, 'brevity_penalty': 0.6901782584838726, 'ratio': 0.7294981640146879, 'translation_length': 596, 'reference_length': 817, 'precisions_0': 0.5879396984924623, 'precisions_1': 0.3880070546737213, 'precisions_2': 0.24767225325884543, 'precisions_3': 0.16173570019723865}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.53s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 164267.78it/s]


{'EM': 0.0, 'BLEU': 0.1636800065822104, 'brevity_penalty': 0.6516017749168791, 'ratio': 0.7001223990208079, 'translation_length': 572, 'reference_length': 817, 'precisions_0': 0.5218150087260035, 'precisions_1': 0.3259668508287293, 'precisions_2': 0.1949317738791423, 'precisions_3': 0.12008281573498965}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.48s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 54565.97it/s]


{'EM': 0.0, 'BLEU': 0.14813196820457947, 'brevity_penalty': 0.5391174748573048, 'ratio': 0.6181150550795593, 'translation_length': 505, 'reference_length': 817, 'precisions_0': 0.567193675889328, 'precisions_1': 0.35294117647058826, 'precisions_2': 0.21252796420581654, 'precisions_3': 0.1339712918660287}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.55s/it]

100%|████████████████████████████████████████| 30/30 [00:00<00:00, 41432.04it/s]


{'EM': 0.0, 'BLEU': 0.18834971437959735, 'brevity_penalty': 0.6237085593295825, 'ratio': 0.6793145654834761, 'translation_length': 555, 'reference_length': 817, 'precisions_0': 0.5539568345323741, 'precisions_1': 0.37072243346007605, 'precisions_2': 0.24193548387096775, 'precisions_3': 0.16738197424892703}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:41<00:00,  3.40s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 186690.09it/s]


{'EM': 0.0, 'BLEU': 0.10572539713850716, 'brevity_penalty': 0.36878000238976066, 'ratio': 0.5006119951040392, 'translation_length': 409, 'reference_length': 817, 'precisions_0': 0.6, 'precisions_1': 0.37894736842105264, 'precisions_2': 0.20797720797720798, 'precisions_3': 0.14285714285714285}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:39<00:00,  3.33s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 169581.02it/s]


{'EM': 0.0, 'BLEU': 0.1583822269620903, 'brevity_penalty': 0.49730851122147096, 'ratio': 0.5887392900856793, 'translation_length': 481, 'reference_length': 817, 'precisions_0': 0.5767634854771784, 'precisions_1': 0.38495575221238937, 'precisions_2': 0.24881516587677724, 'precisions_3': 0.18622448979591838}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.53s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 168445.94it/s]


{'EM': 0.0, 'BLEU': 0.19817390458941755, 'brevity_penalty': 0.6901782584838726, 'ratio': 0.7294981640146879, 'translation_length': 596, 'reference_length': 817, 'precisions_0': 0.52428810720268, 'precisions_1': 0.3403880070546737, 'precisions_2': 0.2271880819366853, 'precisions_3': 0.16765285996055226}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.49s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 182361.04it/s]

{'EM': 0.0, 'BLEU': 0.17483226312341327, 'brevity_penalty': 0.5733892484737265, 'ratio': 0.642594859241126, 'translation_length': 525, 'reference_length': 817, 'precisions_0': 0.5437262357414449, 'precisions_1': 0.3629032258064516, 'precisions_2': 0.2398286937901499, 'precisions_3': 0.182648401826484}





generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.48s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 118706.72it/s]


{'EM': 0.0, 'BLEU': 0.14762765125661323, 'brevity_penalty': 0.6020708956175393, 'ratio': 0.6634026927784578, 'translation_length': 542, 'reference_length': 817, 'precisions_0': 0.5211786372007366, 'precisions_1': 0.30994152046783624, 'precisions_2': 0.18181818181818182, 'precisions_3': 0.12307692307692308}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.55s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 156309.47it/s]


{'EM': 0.0, 'BLEU': 0.12914933421099825, 'brevity_penalty': 0.5287314938218902, 'ratio': 0.6107711138310894, 'translation_length': 499, 'reference_length': 817, 'precisions_0': 0.47, 'precisions_1': 0.2957446808510638, 'precisions_2': 0.1873589164785553, 'precisions_3': 0.1366906474820144}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:39<00:00,  3.33s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 165130.08it/s]


{'EM': 0.0, 'BLEU': 0.17292199972923272, 'brevity_penalty': 0.558035145770047, 'ratio': 0.631578947368421, 'translation_length': 516, 'reference_length': 817, 'precisions_0': 0.5531914893617021, 'precisions_1': 0.3757700205338809, 'precisions_2': 0.2549019607843137, 'precisions_3': 0.1740139211136891}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:41<00:00,  3.38s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 190650.18it/s]


{'EM': 0.0, 'BLEU': 0.11084830985974978, 'brevity_penalty': 0.40656965974059905, 'ratio': 0.5263157894736842, 'translation_length': 430, 'reference_length': 817, 'precisions_0': 0.5081206496519721, 'precisions_1': 0.3266832917705736, 'precisions_2': 0.21390374331550802, 'precisions_3': 0.15561959654178675}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.50s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 184230.04it/s]


{'EM': 0.0, 'BLEU': 0.11918980126197876, 'brevity_penalty': 0.48498918271047725, 'ratio': 0.5801713586291309, 'translation_length': 474, 'reference_length': 817, 'precisions_0': 0.5389473684210526, 'precisions_1': 0.32808988764044944, 'precisions_2': 0.18854415274463007, 'precisions_3': 0.10941475826972011}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.53s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 56527.01it/s]


{'EM': 0.0, 'BLEU': 0.1246026964775031, 'brevity_penalty': 0.47615861075053517, 'ratio': 0.5740514075887393, 'translation_length': 469, 'reference_length': 817, 'precisions_0': 0.5319148936170213, 'precisions_1': 0.32954545454545453, 'precisions_2': 0.20194647201946472, 'precisions_3': 0.13246753246753246}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.49s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 174278.56it/s]


{'EM': 0.0, 'BLEU': 0.19768500077886683, 'brevity_penalty': 0.6806236541219175, 'ratio': 0.7221542227662179, 'translation_length': 590, 'reference_length': 817, 'precisions_0': 0.5329949238578681, 'precisions_1': 0.34581105169340465, 'precisions_2': 0.231203007518797, 'precisions_3': 0.16699801192842942}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.48s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 148910.20it/s]


{'EM': 0.0, 'BLEU': 0.19225404315019848, 'brevity_penalty': 0.635250001256266, 'ratio': 0.6878824969400245, 'translation_length': 562, 'reference_length': 817, 'precisions_0': 0.5612788632326821, 'precisions_1': 0.36585365853658536, 'precisions_2': 0.23856858846918488, 'precisions_3': 0.17124735729386892}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:38<00:00,  3.30s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 63550.06it/s]


{'EM': 0.0, 'BLEU': 0.18364435455706254, 'brevity_penalty': 0.6220534596927131, 'ratio': 0.6780905752753978, 'translation_length': 554, 'reference_length': 817, 'precisions_0': 0.5333333333333333, 'precisions_1': 0.35619047619047617, 'precisions_2': 0.2383838383838384, 'precisions_3': 0.16774193548387098}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.47s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 162780.23it/s]


{'EM': 0.0, 'BLEU': 0.21552423221013614, 'brevity_penalty': 0.6822202628155224, 'ratio': 0.7233782129742962, 'translation_length': 591, 'reference_length': 817, 'precisions_0': 0.5625, 'precisions_1': 0.37544483985765126, 'precisions_2': 0.2518796992481203, 'precisions_3': 0.18725099601593626}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.54s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 55309.50it/s]

{'EM': 0.0, 'BLEU': 0.1762947677167243, 'brevity_penalty': 0.6758238404571268, 'ratio': 0.7184822521419829, 'translation_length': 587, 'reference_length': 817, 'precisions_0': 0.5221088435374149, 'precisions_1': 0.32974910394265233, 'precisions_2': 0.19696969696969696, 'precisions_3': 0.13654618473895583}





generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:38<00:00,  3.28s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 138731.11it/s]


{'EM': 0.0, 'BLEU': 0.18643728686431643, 'brevity_penalty': 0.6237085593295825, 'ratio': 0.6793145654834761, 'translation_length': 555, 'reference_length': 817, 'precisions_0': 0.5611510791366906, 'precisions_1': 0.3650190114068441, 'precisions_2': 0.23588709677419356, 'precisions_3': 0.16523605150214593}


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:42<00:00,  3.42s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 168220.75it/s]


{'EM': 0.0, 'BLEU': 0.1953060133579098, 'brevity_penalty': 0.6499738697156315, 'ratio': 0.6988984088127295, 'translation_length': 571, 'reference_length': 817, 'precisions_0': 0.5384615384615384, 'precisions_1': 0.36162361623616235, 'precisions_2': 0.240234375, 'precisions_3': 0.17427385892116182}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.45s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 53025.34it/s]


{'EM': 0.0, 'BLEU': 0.205091515384775, 'brevity_penalty': 0.6253620853529512, 'ratio': 0.6805385556915544, 'translation_length': 556, 'reference_length': 817, 'precisions_0': 0.5727109515260324, 'precisions_1': 0.3889943074003795, 'precisions_2': 0.2635814889336016, 'precisions_3': 0.19700214132762311}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:42<00:00,  3.42s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 175249.47it/s]


{'EM': 0.0, 'BLEU': 0.18138687916464194, 'brevity_penalty': 0.6286644024420278, 'ratio': 0.6829865361077111, 'translation_length': 558, 'reference_length': 817, 'precisions_0': 0.5491949910554562, 'precisions_1': 0.3553875236294896, 'precisions_2': 0.226, 'precisions_3': 0.15711252653927812}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.46s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 167103.75it/s]


{'EM': 0.0, 'BLEU': 0.20419171319585533, 'brevity_penalty': 0.640172441475687, 'ratio': 0.6915544675642595, 'translation_length': 565, 'reference_length': 817, 'precisions_0': 0.549469964664311, 'precisions_1': 0.3805970149253731, 'precisions_2': 0.25889328063241107, 'precisions_3': 0.19117647058823528}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:42<00:00,  3.43s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 53159.75it/s]


{'EM': 0.0, 'BLEU': 0.17819869707799438, 'brevity_penalty': 0.6270140342187177, 'ratio': 0.6817625458996328, 'translation_length': 557, 'reference_length': 817, 'precisions_0': 0.532258064516129, 'precisions_1': 0.3484848484848485, 'precisions_2': 0.22645290581162325, 'precisions_3': 0.15531914893617021}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.46s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 41268.98it/s]


{'EM': 0.0, 'BLEU': 0.20079239045533986, 'brevity_penalty': 0.635250001256266, 'ratio': 0.6878824969400245, 'translation_length': 562, 'reference_length': 817, 'precisions_0': 0.5488454706927176, 'precisions_1': 0.3714821763602251, 'precisions_2': 0.2544731610337972, 'precisions_3': 0.19238900634249473}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



 13%|█████▊                                      | 4/30 [00:14<01:32,  3.55s/it]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [30]:
model.save_pretrained(current_experiment_path)

In [1]:
# !rm -rf {current_experiment_path}

In [22]:
!df -h .

The history saving thread hit an unexpected error (OperationalError('unable to open database file')).History will not be written to the database.
Filesystem      Size  Used Avail Use% Mounted on
overlay          45G   45G  136K 100% /


In [23]:
!ls -lah {current_experiment_path}

total 52K
drwxr-xr-x  6 root root 251 May  7 21:16 .
drwxr-xr-x 10 root root 245 May  7 20:04 ..
drwxr-xr-x  2 root root  65 May  7 20:07 1683490028.7076824
drwxr-xr-x  2 root root  65 May  7 20:07 1683490028.7192707
drwxr-xr-x  2 root root  31 May  7 21:16 checkpoint-1000
drwxr-xr-x  2 root root 243 May  7 20:42 checkpoint-500
-rw-r--r--  1 root root 22K May  7 21:16 events.out.tfevents.1683490028.8d048d63ed1a.12197.0
-rw-r--r--  1 root root 24K May  7 21:16 events.out.tfevents.1683490028.8d048d63ed1a.12197.2
-rw-r--r--  1 root root 792 May  7 21:27 experiment_config.json


In [28]:
# !rm -rf {current_experiment_path}/checkpoint-1000

In [7]:
# !rm -rf /root/experiments/t2c_concode_220428_v16/checkpoint-20000/

In [8]:
!df -h .

Filesystem      Size  Used Avail Use% Mounted on
overlay          45G   26G   20G  58% /
