In [1]:
## All to folder
## generate prompt
# !ls data
# import time
# time.sleep(60*30)

# Try to do:
# torch.cuda.empty_cache()

In [2]:
import os
import json

import transformers
from peft import PeftModel
from transformers import LlamaForCausalLM as LLaMAForCausalLM
from transformers import LlamaTokenizer as LLaMATokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
from datasets import load_dataset
from EvaluateTestSet import EvaluateTestSet
from transformers.integrations import TensorBoardCallback
from transformers import GenerationConfig

def init_lora_model_and_tokenizer(default_model,
                             LORA_R,
                             LORA_ALPHA,
                             LORA_DROPOUT
                            ):


    """
        
    """
    model = LLaMAForCausalLM.from_pretrained(
    default_model,
    load_in_8bit=True,
    device_map="auto",
    )
    tokenizer = LLaMATokenizer.from_pretrained(
        default_model, add_eos_token=True
    )

    model = prepare_model_for_int8_training(model)

    config = LoraConfig(
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=LORA_DROPOUT,
        bias="none",
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, config)

    tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token

    return model, tokenizer



class MyCustomCallback(TensorBoardCallback):
    #log_bleu_steps_factor = 5
    bleu_generation_max_new_tokens = 30
    bleu_fn_test_data = "temp/t2c_answers.json"
    bleu_fn_etalon = "temp/answers.json"
    log_step = 0
    
    def on_log(self, args, state, control, logs=None, **kwargs):
        super().on_log(args, state, control, logs=logs, **kwargs)
        #print("kwargs", len(kwargs), kwargs.keys())
        if self.tb_writer is not None:
            #print(state)
            #print(state.global_step)
            #print(self.log_step)
            if (self.log_step % self.log_bleu_steps_factor ==0):
                model = kwargs['model']
                tokenizer = kwargs['tokenizer']
                
                model.eval()
                assert not model.training
                generation_config = GenerationConfig(max_new_tokens = self.bleu_generation_max_new_tokens,
                                                     # min_new_tokens = 5,
                                                     temperature = 1.0
                                                    )
                print("generation_config:", generation_config)
                evaluator = EvaluateTestSet(generation_config = generation_config,
                                            fn_test_data = self.bleu_fn_test_data,
                                            fn_etalon = self.bleu_fn_etalon,
                                            batch_size = 1
                                       )

                metric_res = evaluator.evaluate(model=model, 
                                                tokenizer=tokenizer,
                                               )
                model.train()
                assert model.training
                print(metric_res)
                for key, val in metric_res.items():
                    #add "custom/something"
                    self.tb_writer.add_scalar(key, val, state.global_step)
                self.tb_writer.flush()
            self.log_step += 1


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [3]:
CONFIG_PATH = "/root/experiments_config/"
EXPERIMENTS_PATH = "/root/experiments/"
experiment_name = "t2c_concode_220428_v18"
# t2c_concode_220428_v18.json

In [4]:
current_config_path = os.path.join(CONFIG_PATH, experiment_name + "_config.json")
experiment_config = json.load(open(current_config_path, "r"))

assert experiment_config['experiment_name'] == experiment_name

In [5]:
experiment_config['resume_from_checkpoint'] = False

In [6]:
assert experiment_config['resume_from_checkpoint'] == False

In [7]:
assert experiment_config['experiment_name'] == experiment_name

In [8]:
current_experiment_path = os.path.join(EXPERIMENTS_PATH, experiment_name)

In [9]:
!mkdir {current_experiment_path}

mkdir: cannot create directory ‘/root/experiments/t2c_concode_220428_v18’: File exists


In [10]:
json.dump(experiment_config, open(current_experiment_path + \
                                  "/experiment_config.json", 
                                  "w+"
                                 )
         )

In [11]:
setattr(MyCustomCallback, "log_bleu_steps_factor", experiment_config['log_bleu_steps_factor'])

In [12]:
MyCustomCallback.log_bleu_steps_factor

50

In [13]:
model, tokenizer = init_lora_model_and_tokenizer(default_model = experiment_config["default_model"],
                                                 LORA_R = experiment_config["LORA_R"],
                                                 LORA_ALPHA = experiment_config["LORA_ALPHA"],
                                                 LORA_DROPOUT = experiment_config["LORA_DROPOUT"]
                                                )


data = load_dataset("json", 
                    data_files = {"train": experiment_config["fn_train_dataset"],
                                  "eval":  experiment_config["fn_eval_dataset"]
                                 }
                   )



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.
Found cached dataset json (/root/.cache/huggingface/datasets/json/default-3ac2744fedc77f2f/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
tokenizer_val = LLaMATokenizer.from_pretrained(
    experiment_config['default_model'], add_eos_token=True
)
tokenizer_val.pad_token_id = 0  # unk. we want this to be different from the eos token


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [15]:
# experiment_config["logging_steps"] = 1
# experiment_config["eval_steps"] = 1

In [16]:
experiment_config

{'experiment_name': 't2c_concode_220428_v18',
 'fn_train_dataset': '/root/data/t2c_train.json',
 'fn_eval_dataset': '/root/data/t2c_answers.json',
 'default_model': 'decapoda-research/llama-7b-hf',
 'MICRO_BATCH_SIZE': 2,
 'BATCH_SIZE': 10,
 'EPOCHS': 2,
 'LEARNING_RATE': 0.0002,
 'CUTOFF_LEN': 256,
 'LORA_R': 8,
 'LORA_ALPHA': 16,
 'LORA_DROPOUT': 0.05,
 'warmup_steps': 200,
 'fp16': True,
 'logging_steps': 10,
 'eval_steps': 100,
 'evaluation_strategy': 'steps',
 'save_total_limit': 1,
 'save_strategy': 'steps',
 'save_steps': 500,
 'seed': 42,
 'logging_strategy': 'steps',
 'report_to': 'tensorboard',
 'mlm': False,
 'truncation': True,
 'padding': 'max_length',
 'config_use_cache': False,
 'resume_from_checkpoint': False,
 'bleu_batch_size': 5,
 'GRADIENT_ACCUMULATION_STEPS': 5,
 'log_bleu_steps_factor': 50}

In [17]:
# experiment_config["resume_from_checkpoint"]

In [18]:
from prompter import Prompter
prompter = Prompter()

def generate_prompt(data_point):
    if "input" in data_point and data_point["input"]:
        return prompter.generate_prompt(instruction = data_point["instruction"],
                                        input = data_point["input"],
                                        label = data_point["output"]
                                       )
    else:
        return prompter.generate_prompt(instruction = data_point["instruction"],
                                        #input = None,
                                        label = data_point["output"]
                                       )

/root/ipynb/prompter/templates/


In [19]:
# def generate_prompt(data_point):
#     # sorry about the formatting disaster gotta move fast
#     if data_point["input"]:
#         return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
# ### Instruction:
# {data_point["instruction"]}
# ### Input:
# {data_point["input"]}
# ### Response:
# {data_point["output"]}"""
#     else:
#         return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
# ### Instruction:
# {data_point["instruction"]}
# ### Response:
# {data_point["output"]}"""


data = data.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=experiment_config["truncation"],
        max_length=experiment_config["CUTOFF_LEN"],
        padding=experiment_config["padding"]
    )
)

trainer = transformers.Trainer(
    model=model,
    tokenizer=tokenizer_val,
    train_dataset=data["train"],
    eval_dataset=data['eval'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=experiment_config["MICRO_BATCH_SIZE"],
        gradient_accumulation_steps=experiment_config["GRADIENT_ACCUMULATION_STEPS"],
        warmup_steps=experiment_config["warmup_steps"],
        num_train_epochs=experiment_config["EPOCHS"],
        learning_rate=experiment_config["LEARNING_RATE"],
        fp16=experiment_config["fp16"],
        logging_steps=experiment_config["logging_steps"],        
        evaluation_strategy = experiment_config['evaluation_strategy'],
        eval_steps=experiment_config["eval_steps"],
        output_dir=current_experiment_path,#"lora-alpaca",
        save_total_limit=experiment_config["save_total_limit"],
        save_strategy = experiment_config["save_strategy"],
        
        save_steps = experiment_config["save_steps"],
        seed=experiment_config["seed"],
        logging_dir=current_experiment_path,
        logging_strategy=experiment_config["logging_strategy"],
        report_to=experiment_config["report_to"]
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, 
                                                               mlm=experiment_config["mlm"]
                                                              ),
    callbacks = [MyCustomCallback]
)
model.config.use_cache = experiment_config["config_use_cache"]
# print(len(trainer.optimizer.state['found_inf_per_device']))


trainer.train(resume_from_checkpoint=experiment_config["resume_from_checkpoint"])

model.save_pretrained(current_experiment_path)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
100,1.1819,1.284222
200,1.0579,1.161147
300,1.0455,1.136825
400,0.9636,1.130147
500,0.9401,1.126434
600,0.8889,1.123557
700,0.9467,1.119448
800,0.9823,1.117033
900,0.9241,1.114005
1000,0.9646,1.11316


generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



  0%|                                                    | 0/30 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|███████████████████████████████████████████| 30/30 [01:49<00:00,  3.66s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 154961.97it/s]


{'EM': 0.0, 'BLEU': 0.0023321942520815866, 'brevity_penalty': 0.32022114123756484, 'ratio': 0.4675642594859241, 'translation_length': 382, 'reference_length': 817, 'precisions_0': 0.04699738903394256, 'precisions_1': 0.0056657223796034, 'precisions_2': 0.0030959752321981426, 'precisions_3': 0.0034129692832764505}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.47s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 174037.51it/s]


{'EM': 1.0, 'BLEU': 0.18213854851068, 'brevity_penalty': 0.640172441475687, 'ratio': 0.6915544675642595, 'translation_length': 565, 'reference_length': 817, 'precisions_0': 0.5795053003533569, 'precisions_1': 0.3675373134328358, 'precisions_2': 0.22529644268774704, 'precisions_3': 0.13655462184873948}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.46s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 177724.75it/s]


{'EM': 0.0, 'BLEU': 0.17503732338697423, 'brevity_penalty': 0.5682839349897214, 'ratio': 0.6389228886168911, 'translation_length': 522, 'reference_length': 817, 'precisions_0': 0.5430210325047801, 'precisions_1': 0.3752535496957404, 'precisions_2': 0.24838012958963282, 'precisions_3': 0.17782909930715934}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.44s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 175738.99it/s]


{'EM': 0.0, 'BLEU': 0.12317903431305376, 'brevity_penalty': 0.4726194933254629, 'ratio': 0.5716034271725826, 'translation_length': 467, 'reference_length': 817, 'precisions_0': 0.5876068376068376, 'precisions_1': 0.363013698630137, 'precisions_2': 0.20048899755501223, 'precisions_3': 0.10789473684210527}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.44s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 55899.21it/s]


{'EM': 0.0, 'BLEU': 0.1420172454441818, 'brevity_penalty': 0.5200417647285798, 'ratio': 0.6046511627906976, 'translation_length': 494, 'reference_length': 817, 'precisions_0': 0.5616161616161616, 'precisions_1': 0.3655913978494624, 'precisions_2': 0.21149425287356322, 'precisions_3': 0.12807881773399016}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.48s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 54637.05it/s]


{'EM': 0.0, 'BLEU': 0.114198553813074, 'brevity_penalty': 0.43167163234100825, 'ratio': 0.543451652386781, 'translation_length': 444, 'reference_length': 817, 'precisions_0': 0.5932584269662922, 'precisions_1': 0.3686746987951807, 'precisions_2': 0.20671834625322996, 'precisions_3': 0.10833333333333334}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.47s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 187245.71it/s]


{'EM': 0.0, 'BLEU': 0.10469000806658504, 'brevity_penalty': 0.3777850289228291, 'ratio': 0.5067319461444308, 'translation_length': 414, 'reference_length': 817, 'precisions_0': 0.6144578313253012, 'precisions_1': 0.37662337662337664, 'precisions_2': 0.20448179271708683, 'precisions_3': 0.12462006079027356}


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.54s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 50392.12it/s]


{'EM': 0.0, 'BLEU': 0.2019963513958137, 'brevity_penalty': 0.6710090849121607, 'ratio': 0.7148102815177478, 'translation_length': 584, 'reference_length': 817, 'precisions_0': 0.5675213675213675, 'precisions_1': 0.36396396396396397, 'precisions_2': 0.2342857142857143, 'precisions_3': 0.1696969696969697}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.55s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 163414.44it/s]


{'EM': 0.0, 'BLEU': 0.1806196557052586, 'brevity_penalty': 0.63196038331477, 'ratio': 0.6854345165238678, 'translation_length': 560, 'reference_length': 817, 'precisions_0': 0.5436720142602496, 'precisions_1': 0.3578154425612053, 'precisions_2': 0.2275449101796407, 'precisions_3': 0.15074309978768577}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.54s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 52406.96it/s]


{'EM': 0.0, 'BLEU': 0.2002638814096379, 'brevity_penalty': 0.6499738697156315, 'ratio': 0.6988984088127295, 'translation_length': 571, 'reference_length': 817, 'precisions_0': 0.5804195804195804, 'precisions_1': 0.38191881918819187, 'precisions_2': 0.248046875, 'precisions_3': 0.16390041493775934}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.55s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 163202.49it/s]


{'EM': 0.0, 'BLEU': 0.21253830165181342, 'brevity_penalty': 0.6613349669059593, 'ratio': 0.7074663402692778, 'translation_length': 578, 'reference_length': 817, 'precisions_0': 0.5664939550949913, 'precisions_1': 0.3861566484517304, 'precisions_2': 0.26204238921001927, 'precisions_3': 0.18609406952965235}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.55s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 163414.44it/s]


{'EM': 0.0, 'BLEU': 0.20839938981539924, 'brevity_penalty': 0.6467131914862885, 'ratio': 0.6964504283965728, 'translation_length': 569, 'reference_length': 817, 'precisions_0': 0.5894736842105263, 'precisions_1': 0.387037037037037, 'precisions_2': 0.2549019607843137, 'precisions_3': 0.18541666666666667}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.55s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 157286.40it/s]

{'EM': 0.0, 'BLEU': 0.19539415457520604, 'brevity_penalty': 0.6467131914862885, 'ratio': 0.6964504283965728, 'translation_length': 569, 'reference_length': 817, 'precisions_0': 0.5578947368421052, 'precisions_1': 0.3685185185185185, 'precisions_2': 0.2372549019607843, 'precisions_3': 0.17083333333333334}



IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [28]:
model.save_pretrained(current_experiment_path)

In [27]:
# !rm -rf {current_experiment_path}

In [22]:
!df -h .

Filesystem      Size  Used Avail Use% Mounted on
overlay          45G   33G   13G  73% /


In [23]:
!ls -lah {current_experiment_path}

total 17M
drwxr-xr-x  5 root root  280 May  9 20:31 .
drwxr-xr-x 11 root root  275 May  8 21:39 ..
drwxr-xr-x  2 root root   65 May  8 21:48 1683582527.52914
drwxr-xr-x  2 root root   65 May  8 21:48 1683582527.5369353
-rw-r--r--  1 root root  370 May  9 20:31 adapter_config.json
-rw-r--r--  1 root root  17M May  9 20:31 adapter_model.bin
drwxr-xr-x  2 root root  243 May  9 20:30 checkpoint-20000
-rw-r--r--  1 root root 366K May  9 20:30 events.out.tfevents.1683582527.8d048d63ed1a.13365.0
-rw-r--r--  1 root root 388K May  9 20:31 events.out.tfevents.1683582527.8d048d63ed1a.13365.2
-rw-r--r--  1 root root  793 May  8 21:46 experiment_config.json


In [24]:
# !rm -rf {current_experiment_path}/checkpoint-1000

In [5]:
!rm -rf /root/experiments/t2c_concode_220428_v18/checkpoint-20000

In [6]:
!df -h .

Filesystem      Size  Used Avail Use% Mounted on
overlay          45G   27G   19G  59% /
